library(magrittr)
foo_foo <- little_bunny()
## Error in little_bunny(): could not find function "little_bunny"
foo_foo_1 <- hop(foo_foo, through = forest)
## Error in hop(foo_foo, through = forest): could not find function "hop"
foo_foo_2 <- scoop(foo_foo_1, up = field_mice)
## Error in scoop(foo_foo_1, up = field_mice): could not find function "scoop"
foo_foo_3 <- bop(foo_foo_2, on = head)
## Error in bop(foo_foo_2, on = head): could not find function "bop"
diamonds <- ggplot2::diamonds
diamonds2 <- diamonds %>%
dplyr::mutate(price_per_carat = price / carat)
pryr::object_size(diamonds)
## 3.46 MB
pryr::object_size(diamonds2)
## 3.89 MB
pryr::object_size(diamonds, diamonds2)
## 3.89 MB
diamonds$carat[1] <- NA
pryr::object_size(diamonds)
## 3.46 MB
pryr::object_size(diamonds2)
## 3.89 MB
pryr::object_size(diamonds, diamonds2)
## 4.32 MB
foo_foo <- hop(foo_foo, through = forest)
## Error in hop(foo_foo, through = forest): could not find function "hop"
foo_foo <- scoop(foo_foo, up = field_mice)
## Error in scoop(foo_foo, up = field_mice): could not find function "scoop"
foo_foo <- bop(foo_foo, on = head)
## Error in bop(foo_foo, on = head): could not find function "bop"
bop(
scoop(
hop(foo_foo, through = forest),
up = field_mice
),
on = head
)
## Error in bop(scoop(hop(foo_foo, through = forest), up = field_mice), on = head): could not find function "bop"
foo_foo %>%
hop(through = forest) %>%
scoop(up = field_mouse) %>%
bop(on = head)
## Error in eval(lhs, parent, parent): object 'foo_foo' not found
my_pipe <- function(.) {
. <- hop(., through = forest)
. <- scoop(., up = field_mice)
bop(., on = head)
}
my_pipe(foo_foo)
## Error in hop(., through = forest): could not find function "hop"
assign("x", 10)
x
## [1] 10
"x" %>% assign(100)
x
## [1] 10
env <- environment()
"x" %>% assign(100, envir = env)
x
## [1] 100
tryCatch(stop("!"), error = function(e) "An error")
## [1] "An error"
stop("!") %>%
tryCatch(error = function(e) "An error")
## Error in eval(lhs, parent, parent): !
rnorm(100) %>%
matrix(ncol = 2) %>%
plot() %>%
str()
## NULL
rnorm(100) %>%
matrix(ncol = 2) %T>%
plot() %>%
str()
## num [1:50, 1:2] -0.5 0.1 1.11 -1.54 -1.15 ...
mtcars %$%
cor(disp, mpg)
## [1] -0.8475514
cor(mtcars$disp, mtcars$mpg)
## [1] -0.8475514
mtcars <- mtcars %>%
transform(cyl = cyl * 2)
mtcars %<>% transform(cyl = cyl * 2)
df <- tibble::tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
(max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
(max(df$b, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
(max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
(max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
x <- df$a
(x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
## [1] 0.6134160 1.0000000 0.6556065 0.7257814 0.0000000 0.6314405 0.2565740
## [8] 0.3184007 0.9185010 0.3450838
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
## [1] 0.6134160 1.0000000 0.6556065 0.7257814 0.0000000 0.6314405 0.2565740
## [8] 0.3184007 0.9185010 0.3450838
rescale01 <- function(x){
rng <- range(x, na.rm=TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
rescale01(c(0,5,10))
## [1] 0.0 0.5 1.0
rescale01(c(-10,0,10))
## [1] 0.0 0.5 1.0
rescale01(c(1,2,3,NA,5))
## [1] 0.00 0.25 0.50 NA 1.00
df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
x <- c(1:10, Inf)
rescale01(x)
## [1] 0 0 0 0 0 0 0 0 0 0 NaN
rescale01 <- function(x){
rng <- range(x, na.rm = TRUE, finite = TRUE)
(x - rng[1])/(rng[2]-rng[1])
}
rescale01(x)
## [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
## [8] 0.7777778 0.8888889 1.0000000 Inf
f()
## Error in f(): could not find function "f"
my_awesome_function()
## Error in my_awesome_function(): could not find function "my_awesome_function"
impute_missing()
## Error in impute_missing(): could not find function "impute_missing"
collapse_years()
## Error in collapse_years(): could not find function "collapse_years"
col_mins <- function(x,y){}
rowMaxes <- function(x,y){}
input_select()
## Error in input_select(): could not find function "input_select"
input_checkbox()
## Error in input_checkbox(): could not find function "input_checkbox"
imput_text()
## Error in imput_text(): could not find function "imput_text"
select_input()
## Error in select_input(): could not find function "select_input"
checkbox_input()
## Error in checkbox_input(): could not find function "checkbox_input"
text_input()
## Error in text_input(): could not find function "text_input"
if(condition){
# code executed when conditio is TRUE
} else{
# code executed when condition is FALSE
}
## Error in eval(expr, envir, enclos): object 'condition' not found
has_name <- function(x){
nms <- name(x)
if(is.null(nms)){
rep(FALSE, length(x))
} else{
!is.na(nms) & nms!=""
}
}
if(c(TRUE, FALSE)){}
## Warning in if (c(TRUE, FALSE)) {: the condition has length > 1 and only the
## first element will be used
## NULL
if(NA){}
## Error in if (NA) {: missing value where TRUE/FALSE needed
identical(0L,0)
## [1] FALSE
x <- sqrt(2)^2
x
## [1] 2
x==2
## [1] FALSE
x-2
## [1] 4.440892e-16
if(this){
# do that
} else if (that){
# do something else
} else {
#
}
## Error in eval(expr, envir, enclos): object 'this' not found
function(x,y,op){
switch(op,
plus = x + y,
minus = x - y,
times = x * y,
divide = x / y,
stop("Unknown op!"))
}
## function(x,y,op){
## switch(op,
## plus = x + y,
## minus = x - y,
## times = x * y,
## divide = x / y,
## stop("Unknown op!"))
## }
if(y < 0 && debug){
message("Y is negative")
}
## Error in eval(expr, envir, enclos): object 'y' not found
if(y == 0){
log(x)
} else{
y ^ x
}
## Error in eval(expr, envir, enclos): object 'y' not found
y <- 10
x <- if(y<20) "Too low" else "Too high"
if(y < 20){
x <- "Too low"
} else{
x <- "Too high"
}
mean_ci <- function(x, conf = 0.95){
se <- sd(x) / sqrt(length(x))
alpha <- 1 - conf
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
x <- runif(100)
mean_ci(x)
## [1] 0.3918179 0.5042840
mean_ci(x, conf = 0.99)
## [1] 0.3741482 0.5219537
mean(1:10, na.rm = TRUE)
## [1] 5.5
# x,y,z: vectors
# w: a vector of weights
# df: a data frame
# i,j: numeric indices (typically rows and columns)
# n: length, or number of rows
# p: number of columns
wt_mean <- function(x, w){
sum(x * w) / sum(x)
}
wt_var <- function(x, w){
mu <- wt_mean(x, w)
sum(w * (x - mu) ^ 2) / sum(w)
}
wt_sd <- function(x, w){
sqrt(wt_var(x, w))
}
wt_mean(1:6, 1:3)
## [1] 2.190476
wt_mean <- function(x, w){
if(length(x) != length(w)){
stop("`x` and `w` must be the same length", call = FALSE)
}
sum(w*x) / sum(x)
}
wt_mean <- function(x, w, na.rm = FALSE){
if(!is.logical(na.rm)){
stop("`na.rm` must be logical")
}
if(length(na.rm) != 1){
stop("`na.rm` must be length 1")
}
if(length(x) != length(w)){
stop("`x` and `w` must be the same length", call.=FALSE)
}
if(na.rm){
miss <- is.na(x) | is.na(w)
x <- x[!miss]
w <- w[!miss]
}
sum(w*x) / sum(x)
}
wt_mean <- function(x, w, na.rm = FALSE){
stopifnot(is.logical(na.rm), length(na.rm) == 1)
stopifnot(length(x)==length(w))
if(na.rm){
miss <- is.na(x) | is.na(w)
x <- x[!miss]
w <- w[!miss]
}
sum(w * x) / sum(x)
}
wt_mean(1:6, 6:1, na.rm = "foo")
## Error in wt_mean(1:6, 6:1, na.rm = "foo"): is.logical(na.rm) is not TRUE
sum(1,2,3,4,5,6,7,8,9,10)
## [1] 55
stringr::str_c("a","b","c","d","e","f")
## [1] "abcdef"
commas <- function(...) stringr::str_c(..., collapse = ", ")
commas(letters[1:10])
## [1] "a, b, c, d, e, f, g, h, i, j"
rule <- function(..., pad = "-"){
title <- paste0(...)
width <- getOption("width") - nchar(title) - 5
cat(title, " ", stringr::str_dup(pad, width), "\n", sep="")
}
rule("Important output")
## Important output ------------------------------------------------------
x <- c(1,2)
sum(x, na.mr = TRUE)
## [1] 4
complicated_function <- function(x,y,z){
if(length(x) == 0 || length(y) == 0){
return(0)
}
# complicated code here
}
f <- function(){
if(x){
# Do
# something
# that
# takes
# many
# lines
# to
# express
}else{
# return something short
}
}
f <- funtion(){
if(!x){
return(something_short)
}
# Do
# something
# that
# takes
# many
# lines
# to
# express
}
## Error: <text>:1:15: unexpected '{'
## 1: f <- funtion(){
## ^
show_missings <- function(df){
n <- sum(is.na(df))
cat("Missing values: ", n, "\n", sep = "")
invisible(df)
}
show_missings(mtcars)
## Missing values: 0
x <- show_missings(mtcars)
## Missing values: 0
class(x)
## [1] "data.frame"
dim(x)
## [1] 32 11
mtcars %>%
show_missings() %>%
mutate(mpg = ifelse(mpg < 20, NA, mpg)) %>%
show_missings()
## Missing values: 0
## Error in mutate(., mpg = ifelse(mpg < 20, NA, mpg)): could not find function "mutate"
f <- function(x){
x + y
}
y <- 100
f(10)
## [1] 110
y <- 1000
f(10)
## [1] 1010
`+` <- function(x, y) {
if (runif(1) < 0.1) {
sum(x, y)
} else {
sum(x, y) * 1.1
}
}
table(replicate(1000, 1 + 2))
##
## 3 3.3
## 113 887
#>
#> 3 3.3
#> 100 900
rm(`+`)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ .GlobalEnv::has_name() masks tibble::has_name()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
typeof(letters)
## [1] "character"
typeof(1:10)
## [1] "integer"
x <- list("a","b",1:10)
length(x)
## [1] 3
1:10 %% 3 == 0
## [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
c(TRUE, TRUE, FALSE, NA)
## [1] TRUE TRUE FALSE NA
typeof(1)
## [1] "double"
typeof(1L)
## [1] "integer"
1.5L
## [1] 1.5
x <- sqrt(2) ^ 2
x
## [1] 2
x - 2
## [1] 4.440892e-16
c(-1, 0, 1) / 0
## [1] -Inf NaN Inf
is.finite(0)
## [1] TRUE
is.infinite(Inf)
## [1] TRUE
is.na(NA)
## [1] TRUE
is.nan(NaN)
## [1] TRUE
x <- "This is a reasonably long string"
pryr::object_size(x)
## 152 B
y <- rep(x,1000)
pryr::object_size(y)
## 8.14 kB
NA
## [1] NA
NA_integer_
## [1] NA
NA_real_
## [1] NA
NA_character_
## [1] NA
x <- sample(20, 100, replace = TRUE)
y <- x > 10
sum(y)
## [1] 52
mean(y)
## [1] 0.52
typeof(c(TRUE, 1L))
## [1] "integer"
typeof(c(1L, 1.5))
## [1] "double"
typeof(c(1.5, "a"))
## [1] "character"
is_logical()
## Error in is_logical(): argument "x" is missing, with no default
is_integer()
## Error in is_integer(): argument "x" is missing, with no default
is_double()
## Error in is_double(): argument "x" is missing, with no default
is_numeric()
## Warning: Deprecated
## Error in is_integer(x): argument "x" is missing, with no default
is_character()
## Error in is_character(): argument "x" is missing, with no default
is_atomic()
## Error in is_atomic(): argument "x" is missing, with no default
is_list()
## Error in is_list(): argument "x" is missing, with no default
is_vector()
## Error in is_vector(): argument "x" is missing, with no default
sample(10) + 100
## [1] 106 109 105 103 110 102 107 101 104 108
runif(10) > 0.5
## [1] TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE
1:10 + 1:2
## [1] 2 4 4 6 6 8 8 10 10 12
1:10 + 1:3
## Warning in 1:10 + 1:3: longer object length is not a multiple of shorter
## object length
## [1] 2 4 6 5 7 9 8 10 12 11
tibble(x = 1:4,
y = 1:2)
## Tibble columns must have consistent lengths, only values of length one are recycled:
## * Length 2: Column `y`
## * Length 4: Column `x`
tibble(x = 1:4,
y = rep(1:2,2))
## # A tibble: 4 x 2
## x y
## <int> <int>
## 1 1 1
## 2 2 2
## 3 3 1
## 4 4 2
tibble(x = 1:4,
y = rep(1:2, each = 2))
## # A tibble: 4 x 2
## x y
## <int> <int>
## 1 1 1
## 2 2 1
## 3 3 2
## 4 4 2
c(x = 1, y = 2, z = 4)
## x y z
## 1 2 4
set_names(1:3, c("a","b","c"))
## a b c
## 1 2 3
x <- c("one","two","three","four","five")
x[c(3,2,5)]
## [1] "three" "two" "five"
x[c(1,1,5,5,2)]
## [1] "one" "one" "five" "five" "two"
x[c(-1, -3, -5)]
## [1] "two" "four"
x[c(1,-1)]
## Error in x[c(1, -1)]: only 0's may be mixed with negative subscripts
x[0]
## character(0)
x <- c(10,3,NA,5,8,1,NA)
x[!is.na(x)]
## [1] 10 3 5 8 1
x[x %% 2 == 0]
## [1] 10 NA 8 NA
x <- c(abc = 1, def = 2, xyz = 5)
x[c("xyz", "def")]
## xyz def
## 5 2
x <- list(1,2,3)
x
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
str(x)
## List of 3
## $ : num 1
## $ : num 2
## $ : num 3
x_names <- list(a = 1, b = 2, c = 3)
str(x_named)
## Error in str(x_named): object 'x_named' not found
y <- list("a",1L,1.5,TRUE)
str(y)
## List of 4
## $ : chr "a"
## $ : int 1
## $ : num 1.5
## $ : logi TRUE
z <- list(list(1,2),list(3,4))
str(z)
## List of 2
## $ :List of 2
## ..$ : num 1
## ..$ : num 2
## $ :List of 2
## ..$ : num 3
## ..$ : num 4
x1 <- list(c(1,2),c(3,4))
x2 <- list(list(1,2),list(3,4))
x3 <- list(1,list(2,list(3)))
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
str(a[1:2])
## List of 2
## $ a: int [1:3] 1 2 3
## $ b: chr "a string"
str(a[4])
## List of 1
## $ d:List of 2
## ..$ : num -1
## ..$ : num -5
str(y[[1]])
## chr "a"
str(y[[4]])
## logi TRUE
a$a
## [1] 1 2 3
a[["a"]]
## [1] 1 2 3
x <- 1:10
attr(x, "greeting")
## NULL
attr(x, "greeting")<-"Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)
## $greeting
## [1] "Hi!"
##
## $farewell
## [1] "Bye!"
str(x)
## int [1:10] 1 2 3 4 5 6 7 8 9 10
## - attr(*, "greeting")= chr "Hi!"
## - attr(*, "farewell")= chr "Bye!"
as.Date
## function (x, ...)
## UseMethod("as.Date")
## <bytecode: 0x7ff4b3f5aa28>
## <environment: namespace:base>
methods("as.Date")
## [1] as.Date.character as.Date.default as.Date.factor as.Date.numeric
## [5] as.Date.POSIXct as.Date.POSIXlt
## see '?methods' for accessing help and source code
getS3method("as.Date","default")
## function (x, ...)
## {
## if (inherits(x, "Date"))
## x
## else if (is.logical(x) && all(is.na(x)))
## .Date(as.numeric(x))
## else stop(gettextf("do not know how to convert '%s' to class %s",
## deparse(substitute(x)), dQuote("Date")), domain = NA)
## }
## <bytecode: 0x7ff4b6bd8118>
## <environment: namespace:base>
getS3method("as.Date","numeric")
## function (x, origin, ...)
## {
## if (missing(origin))
## stop("'origin' must be supplied")
## as.Date(origin, ...) + x
## }
## <bytecode: 0x7ff4b6bda118>
## <environment: namespace:base>
x <- factor(c("ab","cd","ab"), levels = c("ab","cd","ef"))
typeof(x)
## [1] "integer"
attributes(x)
## $levels
## [1] "ab" "cd" "ef"
##
## $class
## [1] "factor"
x <- as.Date("1971-01-01")
unclass(x)
## [1] 365
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "Date"
x <- lubridate::ymd_hm("1970-01-01 01:00")
unclass(x)
## [1] 3600
## attr(,"tzone")
## [1] "UTC"
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "POSIXct" "POSIXt"
##
## $tzone
## [1] "UTC"
attr(x, "tzone") <- "US/Pacific"
x
## [1] "1969-12-31 17:00:00 PST"
attr(x, "tzone") <- "US/Eastern"
x
## [1] "1969-12-31 20:00:00 EST"
y <- as.POSIXct(x)
typeof(y)
## [1] "double"
attributes(y)
## $class
## [1] "POSIXct" "POSIXt"
##
## $tzone
## [1] "US/Eastern"
tb <- tibble::tibble(x = 1:5, y = 5:1)
typeof(tb)
## [1] "list"
attributes(tb)
## $names
## [1] "x" "y"
##
## $row.names
## [1] 1 2 3 4 5
##
## $class
## [1] "tbl_df" "tbl" "data.frame"
df <- data.frame(x = 1:5, y = 5:1)
typeof(df)
## [1] "list"
attributes(df)
## $names
## [1] "x" "y"
##
## $class
## [1] "data.frame"
##
## $row.names
## [1] 1 2 3 4 5
library(tidyverse)
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
df
## # A tibble: 10 x 4
## a b c d
## <dbl> <dbl> <dbl> <dbl>
## 1 0.719 -1.09 -2.19 -0.283
## 2 -1.97 0.339 1.85 0.00335
## 3 -0.665 1.27 1.07 0.365
## 4 -0.0911 0.293 1.00 -0.0918
## 5 -0.251 -1.79 0.532 -1.43
## 6 0.803 1.66 -0.673 -0.972
## 7 1.02 -0.806 0.835 -0.148
## 8 -0.574 -0.596 -0.248 -2.41
## 9 0.508 0.187 -2.09 0.392
## 10 -0.892 -1.07 1.83 -0.0432
median(df$a)
## [1] -0.1712479
median(df$b)
## [1] -0.2043996
median(df$c)
## [1] 0.6833623
median(df$d)
## [1] -0.1200563
output <- vector("double",ncol(df)) # 1. output
for (i in seq_along(df)){ # 2. sequence
output[[i]] <- median(df[[i]]) # 3. body
}
output
## [1] -0.1712479 -0.2043996 0.6833623 -0.1200563
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
rescale01 <- function(x){
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
for (i in seq_along(df)) {
df[[i]] <- rescale01(df[[i]])
}
for (i in seq_along(x)){
name <- names(x)[[i]]
value <- x[[i]]
}
means <- c(0,1,2)
output <- double()
for (i in seq_along(means)){
n <- sample(100,1)
output <- c(output, rnorm(n, means[[i]]))
}
str(output)
## num [1:99] 0.395 -0.583 -1.552 -1.174 -0.559 ...
out <- vector("list", length(means))
for (i in seq_along(means)){
n <- sample(100,1)
out[[i]] <- rnorm(n,means[[i]])
}
str(out)
## List of 3
## $ : num [1:31] 1.4431 0.2276 -0.0045 0.6027 0.2299 ...
## $ : num [1:12] 0.1253 0.6488 1.304 -0.0971 0.7728 ...
## $ : num [1:56] 3.295 0.907 1.674 0.627 2.116 ...
str(unlist(out))
## num [1:99] 1.4431 0.2276 -0.0045 0.6027 0.2299 ...
while(condition){
# body
}
## Error in eval(expr, envir, enclos): object 'condition' not found
for (i in seq_along(x)){
# body
}
# Equivalent to
i <- 1
while(i <= length(x)){
# body
i <- i + 1
}
flip <- function() sample(c("T","H"),1)
flips <- 0
nheads <- 0
while(nheads < 3) {
if(flip() == "H"){
nheads <- nheads + 1
}else{
nheads <- 0
}
flips <- flips + 1
}
flips
## [1] 4
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
## [1] -0.0675577 0.1918486 -0.2706198 0.2639539
col_mean <- function(df){
output <- vector("double",length(df))
for (i in seq_along(df)){
output[i] <- mean(df[[i]])
}
output
}
col_median <- function(df) {
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[i] <- median(df[[i]])
}
output
}
col_sd <- function(df) {
output <- vector("double", length(df))
for (i in seq_along(df)) {
output[i] <- sd(df[[i]])
}
output
}
f1 <- function(x) abs(x - mean(x)) ^ 1
f2 <- function(x) abs(x - mean(x)) ^ 2
f3 <- function(x) abs(x - mean(x)) ^ 3
f <- function(x, i) abs(x - mean(x)) ^ i
col_summary <- function(df, fun){
out <- vector("double", length(df))
for(i in seq_along(df)){
out[i] <- fun(df[[i]])
}
out
}
col_summary(df, median)
## [1] -0.2387807 0.1880906 -0.1266958 -0.1073063
col_summary(df, mean)
## [1] -0.0675577 0.1918486 -0.2706198 0.2639539
map()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_lgl()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_int()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_dbl()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_chr()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_dbl(df, mean)
## a b c d
## -0.0675577 0.1918486 -0.2706198 0.2639539
map_dbl(df, median)
## a b c d
## -0.2387807 0.1880906 -0.1266958 -0.1073063
map_dbl(df, sd)
## a b c d
## 0.7626600 0.8737739 0.9896946 0.8868068
df %>% map_dbl(mean)
## a b c d
## -0.0675577 0.1918486 -0.2706198 0.2639539
df %>% map_dbl(median)
## a b c d
## -0.2387807 0.1880906 -0.1266958 -0.1073063
df %>% map_dbl(sd)
## a b c d
## 0.7626600 0.8737739 0.9896946 0.8868068
map_dbl(df, mean, trim = 0.5)
## a b c d
## -0.2387807 0.1880906 -0.1266958 -0.1073063
z <- list(x = 1:3, y = 4:5)
map_int(z, length)
## x y
## 3 2
models <- mtcars %>%
split(.$cyl) %>%
map(function(df) lm(mpg~wt, data = df))
models
## $`16`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 39.571 -5.647
##
##
## $`24`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 28.41 -2.78
##
##
## $`32`
##
## Call:
## lm(formula = mpg ~ wt, data = df)
##
## Coefficients:
## (Intercept) wt
## 23.868 -2.192
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg~wt, data=.))
models
## $`16`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 39.571 -5.647
##
##
## $`24`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 28.41 -2.78
##
##
## $`32`
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 23.868 -2.192
models %>%
map(summary) %>%
map_dbl(~.$r.squared)
## 16 24 32
## 0.5086326 0.4645102 0.4229655
models %>%
map(summary) %>%
map_dbl("r.squared")
## 16 24 32
## 0.5086326 0.4645102 0.4229655
x <- list(list(1,2,3), list(4,5,6), list(7,8,9))
x %>% map_dbl(2)
## [1] 2 5 8
x1 <- list(
c(0.27, 0.37, 0.57, 0.91, 0.20),
c(0.90, 0.94, 0.66, 0.63, 0.06),
c(0.21, 0.18, 0.69, 0.38, 0.77)
)
x2 <- list(
c(0.50, 0.72, 0.99, 0.38, 0.78),
c(0.93, 0.21, 0.65, 0.13, 0.27),
c(0.39, 0.01, 0.38, 0.87, 0.34)
)
threshold <- function(x, cutoff = 0.8) x[x > cutoff]
x1 %>% sapply(threshold) %>% str()
## List of 3
## $ : num 0.91
## $ : num [1:2] 0.9 0.94
## $ : num(0)
#> List of 3
#> $ : num 0.91
#> $ : num [1:2] 0.9 0.94
#> $ : num(0)
x2 %>% sapply(threshold) %>% str()
## num [1:3] 0.99 0.93 0.87
#> num [1:3] 0.99 0.93 0.87
safe_log <- safely(log)
str(safe_log(10))
## List of 2
## $ result: num 2.3
## $ error : NULL
str(safe_log("a"))
## List of 2
## $ result: NULL
## $ error :List of 2
## ..$ message: chr "non-numeric argument to mathematical function"
## ..$ call : language .Primitive("log")(x, base)
## ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
x <- list(1,10,"a")
y <- x %>% map(safely(log))
str(y)
## List of 3
## $ :List of 2
## ..$ result: num 0
## ..$ error : NULL
## $ :List of 2
## ..$ result: num 2.3
## ..$ error : NULL
## $ :List of 2
## ..$ result: NULL
## ..$ error :List of 2
## .. ..$ message: chr "non-numeric argument to mathematical function"
## .. ..$ call : language .Primitive("log")(x, base)
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
y <- y %>% transpose()
str(y)
## List of 2
## $ result:List of 3
## ..$ : num 0
## ..$ : num 2.3
## ..$ : NULL
## $ error :List of 3
## ..$ : NULL
## ..$ : NULL
## ..$ :List of 2
## .. ..$ message: chr "non-numeric argument to mathematical function"
## .. ..$ call : language .Primitive("log")(x, base)
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
is_ok <- y$error %>% map_lgl(is_null)
x[!is_ok]
## [[1]]
## [1] "a"
y$result[is_ok] %>% flatten_dbl
## [1] 0.000000 2.302585
x <- list(1,10,"a")
x %>% map_dbl(possibly(log, NA_real_))
## [1] 0.000000 2.302585 NA
x <- list(1,-1)
x %>% map(quietly(log)) %>% str()
## List of 2
## $ :List of 4
## ..$ result : num 0
## ..$ output : chr ""
## ..$ warnings: chr(0)
## ..$ messages: chr(0)
## $ :List of 4
## ..$ result : num NaN
## ..$ output : chr ""
## ..$ warnings: chr "NaNs produced"
## ..$ messages: chr(0)
mu <- list(5,10,-3)
mu %>%
map(rnorm, n = 5) %>%
str()
## List of 3
## $ : num [1:5] 5.21 4.08 6.15 7.32 5.16
## $ : num [1:5] 8.51 10.13 9.78 11.09 11.39
## $ : num [1:5] -4.21 -2.91 -3.71 -3.66 -2.05
sigma <- list(1,5,10)
seq_along(mu) %>%
map(~rnorm(5,mu[[.]],sigma[[.]])) %>%
str()
## List of 3
## $ : num [1:5] 5.7 5.8 7.05 5.86 3.68
## $ : num [1:5] 10.39 9.37 16.63 11.44 8.8
## $ : num [1:5] -21.99 -7.82 -12.81 -1.1 5.86
map2(mu, sigma, rnorm, n = 5) %>% str()
## List of 3
## $ : num [1:5] 4.55 5.53 5.75 6.23 4.18
## $ : num [1:5] 6.57 13.65 15.94 17.59 21.55
## $ : num [1:5] -9.57 -5.38 19.52 -6.89 -11.84
mu %>%
map2(sigma, rnorm, n = 5) %>%
str()
## List of 3
## $ : num [1:5] 4.6 3.8 4.36 3.73 5.9
## $ : num [1:5] 15.19 15.183 0.137 2.734 13.938
## $ : num [1:5] 15.72 -8.55 -6.61 -20.83 -5.96
map2 <- function(x,y,f,...){
out <- vector("list",length(x))
for(i in seq_along(x)){
out[[i]]<-f(x[[i]],y[[i]],...)
}
out
}
n <- list(1,3,5)
args1 <- list(n, mu, sigma)
args1 %>%
pmap(rnorm) %>%
str()
## List of 3
## $ : num 5.82
## $ : num [1:3] 6.91 9.43 9.67
## $ : num [1:5] -3.95 -10.32 -8.87 -5.97 -3.42
args2 <- list(mean = mu, sd = sigma, n = n)
args2 %>%
pmap(rnorm) %>%
str()
## List of 3
## $ : num 4.47
## $ : num [1:3] 5.59 14.99 13.79
## $ : num [1:5] -15.99 -15.84 11.95 -1.27 4.96
params <- tribble(
~mean, ~sd, ~n,
5,1,1,
10, 5, 3,
-3, 10, 5
)
params %>%
pmap(rnorm)
## [[1]]
## [1] 4.271721
##
## [[2]]
## [1] 5.222806 13.321636 7.254072
##
## [[3]]
## [1] -5.17704 -11.02482 -19.16702 11.46873 13.57893
args(rnorm)
## function (n, mean = 0, sd = 1)
## NULL
f <- c("runif","rnorm","rpois")
param <- list(
list(min = -1, max = 1),
list(sd = 5),
list(lambda = 10)
)
invoke_map(f,param,n=5) %>% str()
## List of 3
## $ : num [1:5] 0.799 -0.0709 -0.6819 0.8252 0.9747
## $ : num [1:5] -2.81 7.53 -4.29 2.92 4.51
## $ : int [1:5] 4 10 8 17 17
sim <- tribble(
~f, ~params,
"runif", list(min = 1, max = 1),
"rnorm", list(sd = 5),
"rpois", list(lambda = 10)
)
sim %>% mutate(sim = invoke_map(f,params,n=10))
## # A tibble: 3 x 3
## f params sim
## <chr> <list> <list>
## 1 runif <list [2]> <dbl [10]>
## 2 rnorm <list [1]> <dbl [10]>
## 3 rpois <list [1]> <int [10]>
x <- list(1,"a",3)
x %>%
walk(print)
## [1] 1
## [1] "a"
## [1] 3
library(ggplot2)
plots <- mtcars %>%
split(.$cyl) %>%
map(~ggplot(.,aes(mpg,wt))+geom_point())
paths <- stringr::str_c(names(plots),".pdf")
pwalk(list(paths,plots),ggsave,path = tempdir())
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
iris %>%
keep(is.factor) %>%
str()
## 'data.frame': 150 obs. of 1 variable:
## $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
iris %>%
discard(is.factor) %>%
str()
## 'data.frame': 150 obs. of 4 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
x <- list(1:5, letters, list(10))
x %>%
some(is_character)
## [1] TRUE
x %>%
every(is_vector)
## [1] TRUE
x <- sample(10)
x
## [1] 2 4 5 3 9 1 7 6 8 10
x %>%
detect(~ .>5)
## [1] 9
x %>%
detect_index(~ .>5)
## [1] 5
x %>%
head_while(~. >5)
## integer(0)
x %>%
tail_while(~. >5)
## [1] 7 6 8 10
dfs <- list(
age = tibble(name = "John", age = 30),
sex = tibble(name = c("John", "Mary"), sex = c("M","F")),
trt = tibble(name = "Mary", treatment = "A")
)
dfs %>% reduce(full_join)
## Joining, by = "name"
## Joining, by = "name"
## # A tibble: 2 x 4
## name age sex treatment
## <chr> <dbl> <chr> <chr>
## 1 John 30 M <NA>
## 2 Mary NA F A
vs <- list(
c(1,3,5,6,10),
c(1,2,3,7,8,10),
c(1,2,3,4,8,9,10)
)
vs %>% reduce(intersect)
## [1] 1 3 10
x <- sample(10)
x
## [1] 8 9 2 10 3 1 5 6 7 4
x %>% accumulate(`+`)
## [1] 8 17 19 29 32 33 38 44 51 55
x %>% reduce(`+`)
## [1] 55
library(tidyverse)
library(modelr)
options(na.action = na.warn)
ggplot(sim1, aes(x,y)) + geom_point()
models <- tibble(
a1 = runif(250, -20, 40),
a2 = runif(250,-5,5)
)
ggplot(sim1, aes(x,y)) +
geom_abline(
aes(intercept = a1, slope = a2),
data = models, alpha = 1/4
) +
geom_point()
model1 <- function(a, data){
a[1] + data$x * a[2]
}
model1(c(7,1.5), sim1)
## [1] 8.5 8.5 8.5 10.0 10.0 10.0 11.5 11.5 11.5 13.0 13.0 13.0 14.5 14.5
## [15] 14.5 16.0 16.0 16.0 17.5 17.5 17.5 19.0 19.0 19.0 20.5 20.5 20.5 22.0
## [29] 22.0 22.0
measure_distance <- function(mod, data){
diff <- data$y - model1(mod, data)
sqrt(mean(diff^2))
}
measure_distance(c(7,1.5),sim1)
## [1] 2.665212
sim1_dist <- function(a1,a2){
measure_distance(c(a1,a2),sim1)
}
models <- models %>%
mutate(dist = purrr::map2_dbl(a1,a2,sim1_dist))
models
## # A tibble: 250 x 3
## a1 a2 dist
## <dbl> <dbl> <dbl>
## 1 -11.8 3.28 10.1
## 2 0.983 2.19 3.27
## 3 31.0 -4.11 19.2
## 4 22.1 2.53 20.6
## 5 34.1 1.12 25.0
## 6 16.0 4.80 28.1
## 7 -12.2 1.62 18.9
## 8 -12.1 -4.13 53.4
## 9 -15.7 0.822 27.0
## 10 16.3 -1.57 13.2
## # … with 240 more rows
ggplot(sim1, aes(x,y))+
geom_point(size = 2, color = "grey30") +
geom_abline(
aes(intercept = a1, slope = a2, color = -dist),
data = filter(models, rank(dist)<=10)
)
ggplot(models, aes(a1,a2))+
geom_point(
data = filter(models, rank(dist) <= 10),
size = 4, color = "red"
)+
geom_point(aes(colour = -dist))
grid <- expand.grid(
a1 = seq(-5,20,length = 25),
a2 = seq(1,3,length = 25)
) %>%
mutate(dist = purrr::map2_dbl(a1,a2,sim1_dist))
grid
## a1 a2 dist
## 1 -5.0000000 1.000000 15.452475
## 2 -3.9583333 1.000000 14.443171
## 3 -2.9166667 1.000000 13.438807
## 4 -1.8750000 1.000000 12.440580
## 5 -0.8333333 1.000000 11.450094
## 6 0.2083333 1.000000 10.469547
## 7 1.2500000 1.000000 9.502017
## 8 2.2916667 1.000000 8.551921
## 9 3.3333333 1.000000 7.625781
## 10 4.3750000 1.000000 6.733488
## 11 5.4166667 1.000000 5.890443
## 12 6.4583333 1.000000 5.121026
## 13 7.5000000 1.000000 4.463479
## 14 8.5416667 1.000000 3.973729
## 15 9.5833333 1.000000 3.718673
## 16 10.6250000 1.000000 3.746556
## 17 11.6666667 1.000000 4.051540
## 18 12.7083333 1.000000 4.578581
## 19 13.7500000 1.000000 5.261366
## 20 14.7916667 1.000000 6.047370
## 21 15.8333333 1.000000 6.901415
## 22 16.8750000 1.000000 7.801187
## 23 17.9166667 1.000000 8.732562
## 24 18.9583333 1.000000 9.686429
## 25 20.0000000 1.000000 10.656749
## 26 -5.0000000 1.083333 14.961504
## 27 -3.9583333 1.083333 13.950902
## 28 -2.9166667 1.083333 12.945226
## 29 -1.8750000 1.083333 11.945720
## 30 -0.8333333 1.083333 10.954072
## 31 0.2083333 1.083333 9.972629
## 32 1.2500000 1.083333 9.004726
## 33 2.2916667 1.083333 8.055246
## 34 3.3333333 1.083333 7.131552
## 35 4.3750000 1.083333 6.245095
## 36 5.4166667 1.083333 5.414197
## 37 6.4583333 1.083333 4.668617
## 38 7.5000000 1.083333 4.055685
## 39 8.5416667 1.083333 3.642982
## 40 9.5833333 1.083333 3.502027
## 41 10.6250000 1.083333 3.664315
## 42 11.6666667 1.083333 4.093941
## 43 12.7083333 1.083333 4.718437
## 44 13.7500000 1.083333 5.471478
## 45 14.7916667 1.083333 6.307190
## 46 15.8333333 1.083333 7.196829
## 47 16.8750000 1.083333 8.122696
## 48 17.9166667 1.083333 9.073708
## 49 18.9583333 1.083333 10.042724
## 50 20.0000000 1.083333 11.024998
## 51 -5.0000000 1.166667 14.472350
## 52 -3.9583333 1.166667 13.460492
## 53 -2.9166667 1.166667 12.453550
## 54 -1.8750000 1.166667 11.452822
## 55 -0.8333333 1.166667 10.460090
## 56 0.2083333 1.166667 9.477867
## 57 1.2500000 1.166667 8.509793
## 58 2.2916667 1.166667 7.561306
## 59 3.3333333 1.166667 6.640802
## 60 4.3750000 1.166667 5.761709
## 61 5.4166667 1.166667 4.946157
## 62 6.4583333 1.166667 4.231050
## 63 7.5000000 1.166667 3.675492
## 64 8.5416667 1.166667 3.359589
## 65 9.5833333 1.166667 3.351801
## 66 10.6250000 1.166667 3.654100
## 67 11.6666667 1.166667 4.200055
## 68 12.7083333 1.166667 4.909034
## 69 13.7500000 1.166667 5.720743
## 70 14.7916667 1.166667 6.597373
## 71 15.8333333 1.166667 7.516242
## 72 16.8750000 1.166667 8.463605
## 73 17.9166667 1.166667 9.430878
## 74 18.9583333 1.166667 10.412514
## 75 20.0000000 1.166667 11.404804
## 76 -5.0000000 1.250000 13.985205
## 77 -3.9583333 1.250000 12.972153
## 78 -2.9166667 1.250000 11.964016
## 79 -1.8750000 1.250000 10.962151
## 80 -0.8333333 1.250000 9.968448
## 81 0.2083333 1.250000 8.985617
## 82 1.2500000 1.250000 8.017655
## 83 2.2916667 1.250000 7.070673
## 84 3.3333333 1.250000 6.154363
## 85 4.3750000 1.250000 5.284703
## 86 5.4166667 1.250000 4.488889
## 87 6.4583333 1.250000 3.813437
## 88 7.5000000 1.250000 3.332360
## 89 8.5416667 1.250000 3.136412
## 90 9.5833333 1.250000 3.277145
## 91 10.6250000 1.250000 3.716505
## 92 11.6666667 1.250000 4.365236
## 93 12.7083333 1.250000 5.144735
## 94 13.7500000 1.250000 6.004286
## 95 14.7916667 1.250000 6.914097
## 96 15.8333333 1.250000 7.856728
## 97 16.8750000 1.250000 8.821663
## 98 17.9166667 1.250000 9.802318
## 99 18.9583333 1.250000 10.794410
## 100 20.0000000 1.250000 11.795053
## 101 -5.0000000 1.333333 13.500287
## 102 -3.9583333 1.333333 12.486128
## 103 -2.9166667 1.333333 11.476898
## 104 -1.8750000 1.333333 10.474021
## 105 -0.8333333 1.333333 9.479514
## 106 0.2083333 1.333333 8.496316
## 107 1.2500000 1.333333 7.528860
## 108 2.2916667 1.333333 6.584088
## 109 3.3333333 1.333333 5.673345
## 110 4.3750000 1.333333 4.815974
## 111 5.4166667 1.333333 4.046048
## 112 6.4583333 1.333333 3.423090
## 113 7.5000000 1.333333 3.038869
## 114 8.5416667 1.333333 2.986979
## 115 9.5833333 1.333333 3.283215
## 116 10.6250000 1.333333 3.847999
## 117 11.6666667 1.333333 4.583103
## 118 12.7083333 1.333333 5.419659
## 119 13.7500000 1.333333 6.317493
## 120 14.7916667 1.333333 7.253887
## 121 15.8333333 1.333333 8.215666
## 122 16.8750000 1.333333 9.194868
## 123 17.9166667 1.333333 10.186470
## 124 18.9583333 1.333333 11.187174
## 125 20.0000000 1.333333 12.194741
## 126 -5.0000000 1.416667 13.017843
## 127 -3.9583333 1.416667 12.002697
## 128 -2.9166667 1.416667 10.992516
## 129 -1.8750000 1.416667 9.988803
## 130 -0.8333333 1.416667 8.993727
## 131 0.2083333 1.416667 8.010505
## 132 1.2500000 1.416667 7.044103
## 133 2.2916667 1.416667 6.102519
## 134 3.3333333 1.416667 5.199252
## 135 4.3750000 1.416667 4.358193
## 136 5.4166667 1.416667 3.622929
## 137 6.4583333 1.416667 3.070425
## 138 7.5000000 1.416667 2.810614
## 139 8.5416667 1.416667 2.922624
## 140 9.5833333 1.416667 3.369577
## 141 10.6250000 1.416667 4.041845
## 142 11.6666667 1.416667 4.846556
## 143 12.7083333 1.416667 5.728162
## 144 13.7500000 1.416667 6.656179
## 145 14.7916667 1.416667 7.613654
## 146 15.8333333 1.416667 8.590744
## 147 16.8750000 1.416667 9.581449
## 148 17.9166667 1.416667 10.581947
## 149 18.9583333 1.416667 11.589701
## 150 20.0000000 1.416667 12.602971
## 151 -5.0000000 1.500000 12.538160
## 152 -3.9583333 1.500000 11.522188
## 153 -2.9166667 1.500000 10.511248
## 154 -1.8750000 1.500000 9.506944
## 155 -0.8333333 1.500000 8.511626
## 156 0.2083333 1.500000 7.528858
## 157 1.2500000 1.500000 6.564280
## 158 2.2916667 1.500000 5.627253
## 159 3.3333333 1.500000 4.734166
## 160 4.3750000 1.500000 3.915203
## 161 5.4166667 1.500000 3.227296
## 162 6.4583333 1.500000 2.769874
## 163 7.5000000 1.500000 2.664414
## 164 8.5416667 1.500000 2.948922
## 165 9.5833333 1.500000 3.530343
## 166 10.6250000 1.500000 4.289597
## 167 11.6666667 1.500000 5.148601
## 168 12.7083333 1.500000 6.065121
## 169 13.7500000 1.500000 7.016654
## 170 14.7916667 1.500000 7.990701
## 171 15.8333333 1.500000 8.979940
## 172 16.8750000 1.500000 9.979853
## 173 17.9166667 1.500000 10.987527
## 174 18.9583333 1.500000 12.001008
## 175 20.0000000 1.500000 13.018938
## 176 -5.0000000 1.583333 12.061566
## 177 -3.9583333 1.583333 11.044982
## 178 -2.9166667 1.583333 10.033543
## 179 -1.8750000 1.583333 9.028981
## 180 -0.8333333 1.583333 8.033876
## 181 0.2083333 1.583333 7.052230
## 182 1.2500000 1.583333 6.090556
## 183 2.2916667 1.583333 5.160034
## 184 3.3333333 1.583333 4.281023
## 185 4.3750000 1.583333 3.492635
## 186 5.4166667 1.583333 2.870537
## 187 6.4583333 1.583333 2.540002
## 188 7.5000000 1.583333 2.614072
## 189 8.5416667 1.583333 3.063539
## 190 9.5833333 1.583333 3.755970
## 191 10.6250000 1.583333 4.582520
## 192 11.6666667 1.583333 5.482865
## 193 12.7083333 1.583333 6.426062
## 194 13.7500000 1.583333 7.395733
## 195 14.7916667 1.583333 8.382697
## 196 15.8333333 1.583333 9.381496
## 197 16.8750000 1.583333 10.388719
## 198 17.9166667 1.583333 11.402133
## 199 18.9583333 1.583333 12.420223
## 200 20.0000000 1.583333 13.441925
## 201 -5.0000000 1.666667 11.588444
## 202 -3.9583333 1.666667 10.571525
## 203 -2.9166667 1.666667 9.559936
## 204 -1.8750000 1.666667 8.555568
## 205 -0.8333333 1.666667 7.561300
## 206 0.2083333 1.666667 6.581711
## 207 1.2500000 1.666667 5.624474
## 208 2.2916667 1.666667 4.703258
## 209 3.3333333 1.666667 3.844048
## 210 4.3750000 1.666667 3.098856
## 211 5.4166667 1.666667 2.568902
## 212 6.4583333 1.666667 2.401196
## 213 7.5000000 1.666667 2.665026
## 214 8.5416667 1.666667 3.257166
## 215 9.5833333 1.666667 4.035595
## 216 10.6250000 1.666667 4.912542
## 217 11.6666667 1.666667 5.843821
## 218 12.7083333 1.666667 6.807170
## 219 13.7500000 1.666667 7.790701
## 220 14.7916667 1.666667 8.787640
## 221 15.8333333 1.666667 9.793894
## 222 16.8750000 1.666667 10.806860
## 223 17.9166667 1.666667 11.824815
## 224 18.9583333 1.666667 12.846571
## 225 20.0000000 1.666667 13.871290
## 226 -5.0000000 1.750000 11.119237
## 227 -3.9583333 1.750000 10.102345
## 228 -2.9166667 1.750000 9.091066
## 229 -1.8750000 1.750000 8.087504
## 230 -0.8333333 1.750000 7.094934
## 231 0.2083333 1.750000 6.118709
## 232 1.2500000 1.750000 5.168100
## 233 2.2916667 1.750000 4.260287
## 234 3.3333333 1.750000 3.429427
## 235 4.3750000 1.750000 2.746278
## 236 5.4166667 1.750000 2.343768
## 237 6.4583333 1.750000 2.369514
## 238 7.5000000 1.750000 2.811775
## 239 8.5416667 1.750000 3.516775
## 240 9.5833333 1.750000 4.358838
## 241 10.6250000 1.750000 5.272700
## 242 11.6666667 1.750000 6.226830
## 243 12.7083333 1.750000 7.205247
## 244 13.7500000 1.750000 8.199263
## 245 14.7916667 1.750000 9.203823
## 246 15.8333333 1.750000 10.215819
## 247 16.8750000 1.750000 11.233241
## 248 17.9166667 1.750000 12.254737
## 249 18.9583333 1.750000 13.279367
## 250 20.0000000 1.750000 14.306458
## 251 -5.0000000 1.833333 10.654461
## 252 -3.9583333 1.833333 9.638068
## 253 -2.9166667 1.833333 8.627706
## 254 -1.8750000 1.833333 7.625772
## 255 -0.8333333 1.833333 6.636086
## 256 0.2083333 1.833333 5.665069
## 257 1.2500000 1.833333 4.724248
## 258 2.2916667 1.833333 3.835906
## 259 3.3333333 1.833333 3.046304
## 260 4.3750000 1.833333 2.452732
## 261 5.4166667 1.833333 2.218550
## 262 6.4583333 1.833333 2.449116
## 263 7.5000000 1.833333 3.040480
## 264 8.5416667 1.833333 3.828969
## 265 9.5833333 1.833333 4.716739
## 266 10.6250000 1.833333 5.657242
## 267 11.6666667 1.833333 6.628068
## 268 12.7083333 1.833333 7.617633
## 269 13.7500000 1.833333 8.619484
## 270 14.7916667 1.833333 9.629789
## 271 15.8333333 1.833333 10.646139
## 272 16.8750000 1.833333 11.666957
## 273 17.9166667 1.833333 12.691163
## 274 18.9583333 1.833333 13.717999
## 275 20.0000000 1.833333 14.746915
## 276 -5.0000000 1.916667 10.194722
## 277 -3.9583333 1.916667 9.179435
## 278 -2.9166667 1.916667 8.170793
## 279 -1.8750000 1.916667 7.171597
## 280 -0.8333333 1.916667 6.186429
## 281 0.2083333 1.916667 5.223231
## 282 1.2500000 1.916667 4.296803
## 283 2.2916667 1.916667 3.437009
## 284 3.3333333 1.916667 2.708077
## 285 4.3750000 1.916667 2.241533
## 286 5.4166667 1.916667 2.210294
## 287 6.4583333 1.916667 2.629918
## 288 7.5000000 1.916667 3.334318
## 289 8.5416667 1.916667 4.181988
## 290 9.5833333 1.916667 5.102010
## 291 10.6250000 1.916667 6.061529
## 292 11.6666667 1.916667 7.044423
## 293 12.7083333 1.916667 8.042126
## 294 13.7500000 1.916667 9.049742
## 295 14.7916667 1.916667 10.064294
## 296 15.8333333 1.916667 11.083877
## 297 16.8750000 1.916667 12.107221
## 298 17.9166667 1.916667 13.133445
## 299 18.9583333 1.916667 14.161925
## 300 20.0000000 1.916667 15.192202
## 301 -5.0000000 2.000000 9.740734
## 302 -3.9583333 2.000000 8.727339
## 303 -2.9166667 2.000000 7.721472
## 304 -1.8750000 2.000000 6.726510
## 305 -0.8333333 2.000000 5.748121
## 306 0.2083333 2.000000 4.796457
## 307 1.2500000 2.000000 3.891173
## 308 2.2916667 2.000000 3.073533
## 309 3.3333333 2.000000 2.433540
## 310 4.3750000 2.000000 2.137234
## 311 5.4166667 2.000000 2.320250
## 312 6.4583333 2.000000 2.893007
## 313 7.5000000 2.000000 3.677711
## 314 8.5416667 2.000000 4.566373
## 315 9.5833333 2.000000 5.508912
## 316 10.6250000 2.000000 6.481867
## 317 11.6666667 2.000000 7.473367
## 318 12.7083333 2.000000 8.476909
## 319 13.7500000 2.000000 9.488671
## 320 14.7916667 2.000000 10.506280
## 321 15.8333333 2.000000 11.528187
## 322 16.8750000 2.000000 12.553343
## 323 17.9166667 2.000000 13.581012
## 324 18.9583333 2.000000 14.610663
## 325 20.0000000 2.000000 15.641906
## 326 -5.0000000 2.083333 9.293340
## 327 -3.9583333 2.083333 8.282848
## 328 -2.9166667 2.083333 7.281148
## 329 -1.8750000 2.083333 6.292440
## 330 -0.8333333 2.083333 5.323966
## 331 0.2083333 2.083333 4.389142
## 332 1.2500000 2.083333 3.514921
## 333 2.2916667 2.083333 2.759511
## 334 3.3333333 2.083333 2.246169
## 335 4.3750000 2.083333 2.155409
## 336 5.4166667 2.083333 2.533070
## 337 6.4583333 2.083333 3.218265
## 338 7.5000000 2.083333 4.058098
## 339 8.5416667 2.083333 4.974860
## 340 9.5833333 2.083333 5.932996
## 341 10.6250000 2.083333 6.915330
## 342 11.6666667 2.083333 7.912855
## 343 12.7083333 2.083333 8.920476
## 344 13.7500000 2.083333 9.935122
## 345 14.7916667 2.083333 10.954842
## 346 15.8333333 2.083333 11.978339
## 347 16.8750000 2.083333 13.004721
## 348 17.9166667 2.083333 14.033357
## 349 18.9583333 2.083333 15.063783
## 350 20.0000000 2.083333 16.095656
## 351 -5.0000000 2.166667 8.853540
## 352 -3.9583333 2.166667 7.847256
## 353 -2.9166667 2.166667 6.851557
## 354 -1.8750000 2.166667 5.871829
## 355 -0.8333333 2.166667 4.917627
## 356 0.2083333 2.166667 4.007227
## 357 1.2500000 2.166667 3.178494
## 358 2.2916667 2.166667 2.513548
## 359 3.3333333 2.166667 2.168677
## 360 4.3750000 2.166667 2.293149
## 361 5.4166667 2.166667 2.825605
## 362 6.4583333 2.166667 3.588829
## 363 7.5000000 2.166667 4.466037
## 364 8.5416667 2.166667 5.401983
## 365 9.5833333 2.166667 6.370831
## 366 10.6250000 2.166667 7.359599
## 367 11.6666667 2.166667 8.361222
## 368 12.7083333 2.166667 9.371581
## 369 13.7500000 2.166667 10.388125
## 370 14.7916667 2.166667 11.409203
## 371 15.8333333 2.166667 12.433697
## 372 16.8750000 2.166667 13.460827
## 373 17.9166667 2.166667 14.490032
## 374 18.9583333 2.166667 15.520900
## 375 20.0000000 2.166667 16.553121
## 376 -5.0000000 2.250000 8.422522
## 377 -3.9583333 2.250000 7.422129
## 378 -2.9166667 2.250000 6.434848
## 379 -1.8750000 2.250000 5.467785
## 380 -0.8333333 2.250000 4.533896
## 381 0.2083333 2.250000 3.658673
## 382 1.2500000 2.250000 2.895809
## 383 2.2916667 2.250000 2.357046
## 384 3.3333333 2.250000 2.212637
## 385 4.3750000 2.250000 2.531007
## 386 5.4166667 2.250000 3.175905
## 387 6.4583333 2.250000 3.992103
## 388 7.5000000 2.250000 4.894644
## 389 8.5416667 2.250000 5.843657
## 390 9.5833333 2.250000 6.819769
## 391 10.6250000 2.250000 7.812831
## 392 11.6666667 2.250000 8.817116
## 393 12.7083333 2.250000 9.829185
## 394 13.7500000 2.250000 10.846860
## 395 14.7916667 2.250000 11.868698
## 396 15.8333333 2.250000 12.893710
## 397 16.8750000 2.250000 13.921194
## 398 17.9166667 2.250000 14.950642
## 399 18.9583333 2.250000 15.981673
## 400 20.0000000 2.250000 17.014000
## 401 -5.0000000 2.333333 8.001707
## 402 -3.9583333 2.333333 7.009373
## 403 -2.9166667 2.333333 6.033691
## 404 -1.8750000 2.333333 5.084259
## 405 -0.8333333 2.333333 4.179006
## 406 0.2083333 2.333333 3.353898
## 407 1.2500000 2.333333 2.683899
## 408 2.2916667 2.333333 2.308274
## 409 3.3333333 2.333333 2.371305
## 410 4.3750000 2.333333 2.843973
## 411 5.4166667 2.333333 3.566990
## 412 6.4583333 2.333333 4.419139
## 413 7.5000000 2.333333 5.338942
## 414 8.5416667 2.333333 6.296821
## 415 9.5833333 2.333333 7.277757
## 416 10.6250000 2.333333 8.273553
## 417 11.6666667 2.333333 9.279426
## 418 12.7083333 2.333333 10.292422
## 419 13.7500000 2.333333 11.310628
## 420 14.7916667 2.333333 12.332753
## 421 15.8333333 2.333333 13.357897
## 422 16.8750000 2.333333 14.385415
## 423 17.9166667 2.333333 15.414833
## 424 18.9583333 2.333333 16.445793
## 425 20.0000000 2.333333 17.478023
## 426 -5.0000000 2.416667 7.592791
## 427 -3.9583333 2.416667 6.611303
## 428 -2.9166667 2.416667 5.651399
## 429 -1.8750000 2.416667 4.726249
## 430 -0.8333333 2.416667 3.860919
## 431 0.2083333 2.416667 3.105817
## 432 1.2500000 2.416667 2.560398
## 433 2.2916667 2.416667 2.373882
## 434 3.3333333 2.416667 2.623954
## 435 4.3750000 2.416667 3.210155
## 436 5.4166667 2.416667 3.986877
## 437 6.4583333 2.416667 4.863684
## 438 7.5000000 2.416667 5.795326
## 439 8.5416667 2.416667 6.759165
## 440 9.5833333 2.416667 7.743188
## 441 10.6250000 2.416667 8.740581
## 442 11.6666667 2.416667 9.747240
## 443 12.7083333 2.416667 10.760565
## 444 13.7500000 2.416667 11.778835
## 445 14.7916667 2.416667 12.800871
## 446 15.8333333 2.416667 13.825838
## 447 16.8750000 2.416667 14.853128
## 448 17.9166667 2.416667 15.882291
## 449 18.9583333 2.416667 16.912985
## 450 20.0000000 2.416667 17.944947
## 451 -5.0000000 2.500000 7.197802
## 452 -3.9583333 2.500000 6.230736
## 453 -2.9166667 2.500000 5.292061
## 454 -1.8750000 2.500000 4.399988
## 455 -0.8333333 2.500000 3.589432
## 456 0.2083333 2.500000 2.928871
## 457 1.2500000 2.500000 2.538245
## 458 2.2916667 2.500000 2.545040
## 459 3.3333333 2.500000 2.946508
## 460 4.3750000 2.500000 3.613409
## 461 5.4166667 2.500000 4.427379
## 462 6.4583333 2.500000 5.321351
## 463 7.5000000 2.500000 6.261151
## 464 8.5416667 2.500000 7.228927
## 465 9.5833333 2.500000 8.214798
## 466 10.6250000 2.500000 9.212956
## 467 11.6666667 2.500000 10.219801
## 468 12.7083333 2.500000 11.232999
## 469 13.7500000 2.500000 12.250973
## 470 14.7916667 2.500000 13.272624
## 471 15.8333333 2.500000 14.297164
## 472 16.8750000 2.500000 15.324013
## 473 17.9166667 2.500000 16.352737
## 474 18.9583333 2.500000 17.383002
## 475 20.0000000 2.500000 18.414550
## 476 -5.0000000 2.583333 6.819161
## 477 -3.9583333 2.583333 5.871076
## 478 -2.9166667 2.583333 4.960669
## 479 -1.8750000 2.583333 4.113038
## 480 -0.8333333 2.583333 3.375807
## 481 0.2083333 2.583333 2.836405
## 482 1.2500000 2.583333 2.620011
## 483 2.2916667 2.583333 2.802474
## 484 3.3333333 2.583333 3.318644
## 485 4.3750000 2.583333 4.042657
## 486 5.4166667 2.583333 4.882919
## 487 6.4583333 2.583333 5.789029
## 488 7.5000000 2.583333 6.734460
## 489 8.5416667 2.583333 7.704751
## 490 9.5833333 2.583333 8.691580
## 491 10.6250000 2.583333 9.689895
## 492 11.6666667 2.583333 10.696482
## 493 12.7083333 2.583333 11.709206
## 494 13.7500000 2.583333 12.726604
## 495 14.7916667 2.583333 13.747637
## 496 15.8333333 2.583333 14.771551
## 497 16.8750000 2.583333 15.797787
## 498 17.9166667 2.583333 16.825919
## 499 18.9583333 2.583333 17.855620
## 500 20.0000000 2.583333 18.886634
## 501 -5.0000000 2.666667 6.459744
## 502 -3.9583333 2.666667 5.536399
## 503 -2.9166667 2.666667 4.663184
## 504 -1.8750000 2.666667 3.874144
## 505 -0.8333333 2.666667 3.231538
## 506 0.2083333 2.666667 2.836693
## 507 1.2500000 2.666667 2.796596
## 508 2.2916667 2.666667 3.124934
## 509 3.3333333 2.666667 3.725535
## 510 4.3750000 2.666667 4.490452
## 511 5.4166667 2.666667 5.349657
## 512 6.4583333 2.666667 6.264475
## 513 7.5000000 2.666667 7.213779
## 514 8.5416667 2.666667 8.185579
## 515 9.5833333 2.666667 9.172728
## 516 10.6250000 2.666667 10.170758
## 517 11.6666667 2.666667 11.176754
## 518 12.7083333 2.666667 12.188744
## 519 13.7500000 2.666667 13.205350
## 520 14.7916667 2.666667 14.225583
## 521 15.8333333 2.666667 15.248714
## 522 16.8750000 2.666667 16.274197
## 523 17.9166667 2.666667 17.301613
## 524 18.9583333 2.666667 18.330638
## 525 20.0000000 2.666667 19.361016
## 526 -5.0000000 2.750000 6.122935
## 527 -3.9583333 2.750000 5.231503
## 528 -2.9166667 2.750000 4.406479
## 529 -1.8750000 2.750000 3.692645
## 530 -0.8333333 2.750000 3.166123
## 531 0.2083333 2.750000 2.929706
## 532 1.2500000 2.750000 3.051584
## 533 2.2916667 2.750000 3.494465
## 534 3.3333333 2.750000 4.156988
## 535 4.3750000 2.750000 4.951763
## 536 5.4166667 2.750000 5.824903
## 537 6.4583333 2.750000 6.746049
## 538 7.5000000 2.750000 7.697986
## 539 8.5416667 2.750000 8.670579
## 540 9.5833333 2.750000 9.657590
## 541 10.6250000 2.750000 10.655012
## 542 11.6666667 2.750000 11.660174
## 543 12.7083333 2.750000 12.671234
## 544 13.7500000 2.750000 13.686885
## 545 14.7916667 2.750000 14.706176
## 546 15.8333333 2.750000 15.728399
## 547 16.8750000 2.750000 16.753018
## 548 17.9166667 2.750000 17.779618
## 549 18.9583333 2.750000 18.807875
## 550 20.0000000 2.750000 19.837531
## 551 -5.0000000 2.833333 5.812668
## 552 -3.9583333 2.833333 4.961881
## 553 -2.9166667 2.833333 4.198041
## 554 -1.8750000 2.833333 3.577287
## 555 -0.8333333 2.833333 3.184423
## 556 0.2083333 2.833333 3.107130
## 557 1.2500000 2.833333 3.367210
## 558 2.2916667 2.833333 3.897703
## 559 3.3333333 2.833333 4.606106
## 560 4.3750000 2.833333 5.423142
## 561 5.4166667 2.833333 6.306733
## 562 6.4583333 2.833333 7.232525
## 563 7.5000000 2.833333 8.186214
## 564 8.5416667 2.833333 9.159089
## 565 9.5833333 2.833333 10.145633
## 566 10.6250000 2.833333 11.142216
## 567 11.6666667 2.833333 12.146366
## 568 12.7083333 2.833333 13.156351
## 569 13.7500000 2.833333 14.170924
## 570 14.7916667 2.833333 15.189165
## 571 15.8333333 2.833333 16.210383
## 572 16.8750000 2.833333 17.234049
## 573 17.9166667 2.833333 18.259752
## 574 18.9583333 2.833333 19.287165
## 575 20.0000000 2.833333 20.316030
## 576 -5.0000000 2.916667 5.533408
## 577 -3.9583333 2.916667 4.733562
## 578 -2.9166667 2.916667 4.045339
## 579 -1.8750000 2.916667 3.534552
## 580 -0.8333333 2.916667 3.285040
## 581 0.2083333 2.916667 3.355600
## 582 1.2500000 2.916667 3.728104
## 583 2.2916667 2.916667 4.325229
## 584 3.3333333 2.916667 5.068194
## 585 4.3750000 2.916667 5.902179
## 586 5.4166667 2.916667 6.793746
## 587 6.4583333 2.916667 7.722977
## 588 7.5000000 2.916667 8.677783
## 589 8.5416667 2.916667 9.650575
## 590 9.5833333 2.916667 10.636419
## 591 10.6250000 2.916667 11.631998
## 592 11.6666667 2.916667 12.635010
## 593 12.7083333 2.916667 13.643816
## 594 13.7500000 2.916667 14.657219
## 595 14.7916667 2.916667 15.674329
## 596 15.8333333 2.916667 16.694468
## 597 16.8750000 2.916667 17.717111
## 598 17.9166667 2.916667 18.741851
## 599 18.9583333 2.916667 19.768359
## 600 20.0000000 2.916667 20.796376
## 601 -5.0000000 3.000000 5.290068
## 602 -3.9583333 3.000000 4.552767
## 603 -2.9166667 3.000000 3.954833
## 604 -1.8750000 3.000000 3.567051
## 605 -0.8333333 3.000000 3.460801
## 606 0.2083333 3.000000 3.660679
## 607 1.2500000 3.000000 4.122395
## 608 2.2916667 3.000000 4.770519
## 609 3.3333333 3.000000 5.540009
## 610 4.3750000 3.000000 6.387150
## 611 5.4166667 3.000000 7.284903
## 612 6.4583333 3.000000 8.216694
## 613 7.5000000 3.000000 9.172157
## 614 8.5416667 3.000000 10.144605
## 615 9.5833333 3.000000 11.129586
## 616 10.6250000 3.000000 12.124047
## 617 11.6666667 3.000000 13.125832
## 618 12.7083333 3.000000 14.133385
## 619 13.7500000 3.000000 15.145554
## 620 14.7916667 3.000000 16.161472
## 621 15.8333333 3.000000 17.180474
## 622 16.8750000 3.000000 18.202042
## 623 17.9166667 3.000000 19.225767
## 624 18.9583333 3.000000 20.251322
## 625 20.0000000 3.000000 21.278443
grid %>%
ggplot(aes(a1,a2))+
geom_point(
data = filter(grid, rank(dist)<=10),
size = 4, color = "red"
) +
geom_point(aes(color = -dist))
ggplot(sim1, aes(x,y)) +
geom_point(size = 2, color = "grey30") +
geom_abline(
aes(intercept = a1, slope = a2, color = -dist),
data = filter(grid, rank(dist)<=10)
)
best <- optim(c(0,0), measure_distance, data = sim1)
best$par
## [1] 4.222248 2.051204
ggplot(sim1, aes(x,y)) +
geom_point(size = 2, color = "grey30") +
geom_abline(intercept = best$par[1], slope = best$par[2])
sim1_mod <- lm(y~x, data = sim1)
coef(sim1_mod)
## (Intercept) x
## 4.220822 2.051533
grid <- sim1 %>%
data_grid(x)
grid
## # A tibble: 10 x 1
## x
## <int>
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
sim1
## # A tibble: 30 x 2
## x y
## <int> <dbl>
## 1 1 4.20
## 2 1 7.51
## 3 1 2.13
## 4 2 8.99
## 5 2 10.2
## 6 2 11.3
## 7 3 7.36
## 8 3 10.5
## 9 3 10.5
## 10 4 12.4
## # … with 20 more rows
grid <- grid %>%
add_predictions(sim1_mod)
grid
## # A tibble: 10 x 2
## x pred
## <int> <dbl>
## 1 1 6.27
## 2 2 8.32
## 3 3 10.4
## 4 4 12.4
## 5 5 14.5
## 6 6 16.5
## 7 7 18.6
## 8 8 20.6
## 9 9 22.7
## 10 10 24.7
ggplot(sim1, aes(x)) +
geom_point(aes(y = y)) +
geom_line(
aes(y = pred),
data = grid,
color = "red",
size = 1
)
sim1 <- sim1 %>%
add_residuals(sim1_mod)
sim1
## # A tibble: 30 x 3
## x y resid
## <int> <dbl> <dbl>
## 1 1 4.20 -2.07
## 2 1 7.51 1.24
## 3 1 2.13 -4.15
## 4 2 8.99 0.665
## 5 2 10.2 1.92
## 6 2 11.3 2.97
## 7 3 7.36 -3.02
## 8 3 10.5 0.130
## 9 3 10.5 0.136
## 10 4 12.4 0.00763
## # … with 20 more rows
ggplot(sim1, aes(resid)) +
geom_freqpoly(binwidth = 0.5)
ggplot(sim1, aes(x, resid)) +
geom_ref_line(h = 0) +
geom_point()
df <- tribble(
~y, ~x1, ~x2,
4,2,5,
5,1,6
)
model_matrix(df, y ~ x1)
## # A tibble: 2 x 2
## `(Intercept)` x1
## <dbl> <dbl>
## 1 1 2
## 2 1 1
model_matrix(df, y ~ x1 - 1)
## # A tibble: 2 x 1
## x1
## <dbl>
## 1 2
## 2 1
model_matrix(df, y ~ x1 + x2)
## # A tibble: 2 x 3
## `(Intercept)` x1 x2
## <dbl> <dbl> <dbl>
## 1 1 2 5
## 2 1 1 6
df <- tribble(
~sex, ~response,
"male",1,
"female",2,
"male",1
)
model_matrix(df, response ~ sex)
## # A tibble: 3 x 2
## `(Intercept)` sexmale
## <dbl> <dbl>
## 1 1 1
## 2 1 0
## 3 1 1
ggplot(sim2) +
geom_point(aes(x,y))
mod2 <- lm(y ~ x, data = sim2)
grid <- sim2 %>%
data_grid(x) %>%
add_predictions(mod2)
grid
## # A tibble: 4 x 2
## x pred
## <chr> <dbl>
## 1 a 1.15
## 2 b 8.12
## 3 c 6.13
## 4 d 1.91
ggplot(sim2, aes(x)) +
geom_point(aes(y = y)) +
geom_point(
data = grid,
aes(y = pred),
color = "red",
size = 4
)
tibble(x = "e") %>%
add_predictions(mod2)
## Error in model.frame.default(Terms, newdata, na.action = na.action, xlev = object$xlevels): factor x has new level e
ggplot(sim3, aes(x1,y)) +
geom_point(aes(color = x2))
mod1 <- lm(y ~ x1 + x2, data = sim3)
mod2 <- lm(y ~ x1 * x2, data = sim3)
grid <- sim3 %>%
data_grid(x1,x2) %>%
gather_predictions(mod1,mod2)
grid
## # A tibble: 80 x 4
## model x1 x2 pred
## <chr> <int> <fct> <dbl>
## 1 mod1 1 a 1.67
## 2 mod1 1 b 4.56
## 3 mod1 1 c 6.48
## 4 mod1 1 d 4.03
## 5 mod1 2 a 1.48
## 6 mod1 2 b 4.37
## 7 mod1 2 c 6.28
## 8 mod1 2 d 3.84
## 9 mod1 3 a 1.28
## 10 mod1 3 b 4.17
## # … with 70 more rows
ggplot(sim3, aes(x1,y,color = x2)) +
geom_point() +
geom_line(data = grid, aes(y = pred)) +
facet_wrap(~model)
sim3 <- sim3 %>%
gather_residuals(mod1, mod2)
ggplot(sim3, aes(x1, resid, color = x2)) +
geom_point() +
facet_grid(model ~ x2)
mod1 <- lm(y ~ x1 + x2, data = sim4)
mod2 <- lm(y ~ x1 * x2, data = sim4)
grid <- sim4 %>%
data_grid(
x1 = seq_range(x1,5),
x2 = seq_range(x2,5)
) %>%
gather_predictions(mod1, mod2)
grid
## # A tibble: 50 x 4
## model x1 x2 pred
## <chr> <dbl> <dbl> <dbl>
## 1 mod1 -1 -1 0.996
## 2 mod1 -1 -0.5 -0.395
## 3 mod1 -1 0 -1.79
## 4 mod1 -1 0.5 -3.18
## 5 mod1 -1 1 -4.57
## 6 mod1 -0.5 -1 1.91
## 7 mod1 -0.5 -0.5 0.516
## 8 mod1 -0.5 0 -0.875
## 9 mod1 -0.5 0.5 -2.27
## 10 mod1 -0.5 1 -3.66
## # … with 40 more rows
seq_range(c(0.0123,0.923423),n=5)
## [1] 0.0123000 0.2400808 0.4678615 0.6956423 0.9234230
seq_range(c(0.0123,0.923423),n=5,pretty=TRUE)
## [1] 0.0 0.2 0.4 0.6 0.8 1.0
x1 <- rcauchy(100)
seq_range(x1,n=5)
## [1] -34.812171 -21.321906 -7.831641 5.658625 19.148890
seq_range(x1,n=5,trim=0.1)
## [1] -4.3109721 -2.4428614 -0.5747507 1.2933601 3.1614708
seq_range(x1,n=5,trim=0.25)
## [1] -2.8922244 -1.9190161 -0.9458079 0.0274004 1.0006087
seq_range(x1,n=5,trim=0.5)
## [1] -1.71863268 -1.17662139 -0.63461010 -0.09259882 0.44941247
x2 <- c(0,1)
seq_range(x2, n = 5)
## [1] 0.00 0.25 0.50 0.75 1.00
seq_range(x2,n=5,expand=0.1)
## [1] -0.050 0.225 0.500 0.775 1.050
seq_range(x2,n=5,expand=0.25)
## [1] -0.1250 0.1875 0.5000 0.8125 1.1250
seq_range(x2,n=5,expand=0.5)
## [1] -0.250 0.125 0.500 0.875 1.250
ggplot(grid, aes(x1,x2)) +
geom_tile(aes(fill = pred)) +
facet_wrap(~model)
ggplot(grid, aes(x1,pred,color = x2,group = x2)) +
geom_line() +
facet_wrap(~model)
ggplot(grid, aes(x2, pred, color = x1, group = x1)) +
geom_line() +
facet_wrap(~model)
df <- tribble(
~y, ~x,
1,1,
2,2,
3,3
)
model_matrix(df, y ~ x^2 + x)
## # A tibble: 3 x 2
## `(Intercept)` x
## <dbl> <dbl>
## 1 1 1
## 2 1 2
## 3 1 3
model_matrix(df, y ~ I(x^2) + x)
## # A tibble: 3 x 3
## `(Intercept)` `I(x^2)` x
## <dbl> <dbl> <dbl>
## 1 1 1 1
## 2 1 4 2
## 3 1 9 3
model_matrix(df, y ~ poly(x,2))
## # A tibble: 3 x 3
## `(Intercept)` `poly(x, 2)1` `poly(x, 2)2`
## <dbl> <dbl> <dbl>
## 1 1 -7.07e- 1 0.408
## 2 1 -7.85e-17 -0.816
## 3 1 7.07e- 1 0.408
library(splines)
model_matrix(df, y ~ ns(x,2))
## # A tibble: 3 x 3
## `(Intercept)` `ns(x, 2)1` `ns(x, 2)2`
## <dbl> <dbl> <dbl>
## 1 1 0 0
## 2 1 0.566 -0.211
## 3 1 0.344 0.771
sim5 <- tibble(
x = seq(0, 3.5 * pi, length = 50),
y = 4 * sin(x) + rnorm(length(x))
)
ggplot(sim5, aes(x,y)) +
geom_point()
mod1 <- lm(y ~ ns(x, 1), data = sim5)
mod2 <- lm(y ~ ns(x, 2), data = sim5)
mod3 <- lm(y ~ ns(x, 3), data = sim5)
mod4 <- lm(y ~ ns(x, 4), data = sim5)
mod5 <- lm(y ~ ns(x, 5), data = sim5)
grid <- sim5 %>%
data_grid(x = seq_range(x,n=50,expand=0.1)) %>%
gather_predictions(mod1,mod2,mod3,mod4,mod5,.pred="y")
ggplot(sim5, aes(x,y)) +
geom_point() +
geom_line(data = grid, color = "red") +
facet_wrap(~model)
df <- tribble(
~x, ~y,
1,2.2,
2,NA,
3,3.5,
4,8.3,
NA,10
)
mod <- lm(y ~ x, data = df)
## Warning: Dropping 2 rows with missing values
mod <- lm(y ~ x, data = df, na.action = na.exclude)
nobs(mod)
## [1] 3
# generalized linear models
# generalized addictive models
# penalized linear models
# robust linear models
# trees
library(tidyverse)
library(modelr)
options(na.action = na.warn)
library(nycflights13)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
ggplot(diamonds, aes(cut, price)) + geom_boxplot()
ggplot(diamonds, aes(color, price)) + geom_boxplot()
ggplot(diamonds, aes(clarity, price)) + geom_boxplot()
ggplot(diamonds, aes(carat, price)) + geom_hex(bins = 50)
## Warning: Removed 1 rows containing non-finite values (stat_binhex).
diamonds2 <- diamonds %>%
filter(carat <= 2.5) %>%
mutate(lprice = log2(price), lcarat = log2(carat))
ggplot(diamonds2, aes(lcarat, lprice)) + geom_hex(bins = 50)
mod_diamond <- lm(lprice ~ lcarat, data = diamonds2)
grid <- diamonds2 %>%
data_grid(carat = seq_range(carat,20)) %>%
mutate(lcarat = log2(carat)) %>%
add_predictions(mod_diamond,"lprice") %>%
mutate(price = 2 ^ lprice)
ggplot(diamonds2, aes(carat, price)) +
geom_hex(bins = 50) +
geom_line(data = grid, color = "red", size = 1)
diamonds2 <- diamonds2 %>%
add_residuals(mod_diamond, "lresid")
ggplot(diamonds2, aes(lcarat, lresid)) +
geom_hex(bins = 50)
ggplot(diamonds2, aes(cut, lresid)) + geom_boxplot()
ggplot(diamonds2, aes(color, lresid)) + geom_boxplot()
ggplot(diamonds2, aes(clarity, lresid)) + geom_boxplot()
mod_diamond2 <- lm(
lprice ~ lcarat + color + cut + clarity,
data = diamonds2
)
grid <- diamonds2 %>%
data_grid(cut, .model = mod_diamond2) %>%
add_predictions(mod_diamond2)
grid
## # A tibble: 5 x 5
## cut lcarat color clarity pred
## <ord> <dbl> <chr> <chr> <dbl>
## 1 Fair -0.515 G VS2 11.2
## 2 Good -0.515 G VS2 11.3
## 3 Very Good -0.515 G VS2 11.4
## 4 Premium -0.515 G VS2 11.4
## 5 Ideal -0.515 G VS2 11.4
ggplot(grid, aes(cut, pred)) +
geom_point()
diamonds2 <- diamonds2 %>%
add_residuals(mod_diamond2,"lresid2")
ggplot(diamonds2, aes(lcarat, lresid2)) +
geom_hex(bins = 50)
diamonds2 %>%
filter(abs(lresid2) > 1) %>%
add_predictions(mod_diamond2) %>%
mutate(pred = round(2^pred)) %>%
select(price, pred, carat:table, x:z) %>%
arrange(price)
## # A tibble: 16 x 11
## price pred carat cut color clarity depth table x y z
## <int> <dbl> <dbl> <ord> <ord> <ord> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1013 264 0.25 Fair F SI2 54.4 64 4.3 4.23 2.32
## 2 1186 284 0.25 Premium G SI2 59 60 5.33 5.28 3.12
## 3 1186 284 0.25 Premium G SI2 58.8 60 5.33 5.28 3.12
## 4 1262 2644 1.03 Fair E I1 78.2 54 5.72 5.59 4.42
## 5 1415 639 0.35 Fair G VS2 65.9 54 5.57 5.53 3.66
## 6 1415 639 0.35 Fair G VS2 65.9 54 5.57 5.53 3.66
## 7 1715 576 0.32 Fair F VS2 59.6 60 4.42 4.34 2.61
## 8 1776 412 0.290 Fair F SI1 55.8 60 4.48 4.41 2.48
## 9 2160 314 0.34 Fair F I1 55.8 62 4.72 4.6 2.6
## 10 2366 775 0.3 Very Good D VVS2 60.6 58 4.33 4.35 2.63
## 11 3360 1373 0.51 Premium F SI1 62.7 62 5.09 4.96 3.15
## 12 3807 1540 0.61 Good F SI2 62.5 65 5.36 5.29 3.33
## 13 3920 1705 0.51 Fair F VVS2 65.4 60 4.98 4.9 3.23
## 14 4368 1705 0.51 Fair F VVS2 60.7 66 5.21 5.11 3.13
## 15 10011 4048 1.01 Fair D SI2 64.6 58 6.25 6.2 4.02
## 16 10470 23622 2.46 Premium E SI2 59.7 59 8.82 8.76 5.25
daily <- flights %>%
mutate(date = make_date(year, month, day)) %>%
group_by(date) %>%
summarize(n = n())
daily
## # A tibble: 365 x 2
## date n
## <date> <int>
## 1 2013-01-01 842
## 2 2013-01-02 943
## 3 2013-01-03 914
## 4 2013-01-04 915
## 5 2013-01-05 720
## 6 2013-01-06 832
## 7 2013-01-07 933
## 8 2013-01-08 899
## 9 2013-01-09 902
## 10 2013-01-10 932
## # … with 355 more rows
ggplot(daily, aes(date, n)) + geom_line()
daily <- daily %>%
mutate(wday = wday(date, label = TRUE))
ggplot(daily, aes(wday, n)) +
geom_boxplot()
mod <- lm(n ~ wday, data = daily)
grid <- daily %>%
data_grid(wday) %>%
add_predictions(mod, "n")
ggplot(daily, aes(wday, n)) +
geom_boxplot() +
geom_point(data = grid, color = "red", size = 4)
daily <- daily %>%
add_residuals(mod)
daily %>%
ggplot(aes(date, resid)) +
geom_ref_line(h = 0) +
geom_line()
ggplot(daily, aes(date, resid, color = wday)) +
geom_ref_line(h = 0) +
geom_line()
daily %>%
filter(resid < -100)
## # A tibble: 11 x 4
## date n wday resid
## <date> <int> <ord> <dbl>
## 1 2013-01-01 842 Tue -109.
## 2 2013-01-20 786 Sun -105.
## 3 2013-05-26 729 Sun -162.
## 4 2013-07-04 737 Thu -229.
## 5 2013-07-05 822 Fri -145.
## 6 2013-09-01 718 Sun -173.
## 7 2013-11-28 634 Thu -332.
## 8 2013-11-29 661 Fri -306.
## 9 2013-12-24 761 Tue -190.
## 10 2013-12-25 719 Wed -244.
## 11 2013-12-31 776 Tue -175.
daily %>%
ggplot(aes(date, resid)) +
geom_ref_line(h = 0) +
geom_line(color = "grey50") +
geom_smooth(se = FALSE, span = 0.2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
daily %>%
filter(wday == "Sat") %>%
ggplot(aes(date, n)) +
geom_point() +
geom_line() +
scale_x_date(
NULL,
date_breaks = "1 month",
date_labels = "%b"
)
term <- function(date){
cut(date,
breaks = ymd(20130101, 20130605, 20130825, 20140101),
labels = c("spring","summer","fall"))
}
daily <- daily %>%
mutate(term = term(date))
daily %>%
filter(wday == "Sat") %>%
ggplot(aes(date, n, color = term)) +
geom_point(alpha = 1/3) +
geom_line() +
scale_x_date(
NULL,
date_breaks = "1 month",
date_labels = "%b"
)
daily %>%
ggplot(aes(wday, n, color = term)) +
geom_boxplot()
mod1 <- lm(n ~ wday, data = daily)
mod2 <- lm(n ~ wday * term, data = daily)
daily %>%
gather_residuals(without_term = mod1, with_term = mod2) %>%
ggplot(aes(date, resid, color = model)) +
geom_line(alpha = 0.75)
grid <- daily %>%
data_grid(wday, term) %>%
add_predictions(mod2, "n")
ggplot(daily, aes(wday, n)) +
geom_boxplot() +
geom_point(data = grid, color = "red") +
facet_wrap(~term)
mod3 <- MASS::rlm(n ~ wday * term, data = daily)
daily %>%
add_residuals(mod3, "resid") %>%
ggplot(aes(date, resid)) +
geom_hline(yintercept = 0, size = 2, color = "white") +
geom_line()
compute_vars <- function(data){
data %>%
mutate(
term = term(date),
wday = wday(date, label = TRUE)
)
}
wday2 <- function(x) wday(x, label = TRUE)
mod3 <- lm(n ~ wday2(date) * term(date), data = daily)
library(splines)
mod <- MASS::rlm(n ~ wday * ns(date,5), data = daily)
daily %>%
data_grid(wday, date = seq_range(date, n = 13)) %>%
add_predictions(mod) %>%
ggplot(aes(date, pred, color = wday)) +
geom_line() +
geom_point()
library(modelr)
library(tidyverse)
library(gapminder)
gapminder
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # … with 1,694 more rows
gapminder %>%
ggplot(aes(year, lifeExp, group = country)) +
geom_line(alpha = 1/3)
nz <- filter(gapminder, country == "New Zealand")
nz %>%
ggplot(aes(year, lifeExp)) +
geom_line() +
ggtitle("Full data = ")
nz_mod <- lm(lifeExp ~ year, data = nz)
nz %>%
add_predictions(nz_mod) %>%
ggplot(aes(year, pred)) +
geom_line() +
ggtitle("Linear trend + ")
nz %>%
add_residuals(nz_mod) %>%
ggplot(aes(year, resid)) +
geom_hline(yintercept = 0, color = "white", size = 3) +
geom_line() +
ggtitle("Remaining pattern")
by_country <- gapminder %>%
group_by(country, continent) %>%
nest()
by_country
## # A tibble: 142 x 3
## country continent data
## <fct> <fct> <list>
## 1 Afghanistan Asia <tibble [12 × 4]>
## 2 Albania Europe <tibble [12 × 4]>
## 3 Algeria Africa <tibble [12 × 4]>
## 4 Angola Africa <tibble [12 × 4]>
## 5 Argentina Americas <tibble [12 × 4]>
## 6 Australia Oceania <tibble [12 × 4]>
## 7 Austria Europe <tibble [12 × 4]>
## 8 Bahrain Asia <tibble [12 × 4]>
## 9 Bangladesh Asia <tibble [12 × 4]>
## 10 Belgium Europe <tibble [12 × 4]>
## # … with 132 more rows
by_country$data[[1]]
## # A tibble: 12 x 4
## year lifeExp pop gdpPercap
## <int> <dbl> <int> <dbl>
## 1 1952 28.8 8425333 779.
## 2 1957 30.3 9240934 821.
## 3 1962 32.0 10267083 853.
## 4 1967 34.0 11537966 836.
## 5 1972 36.1 13079460 740.
## 6 1977 38.4 14880372 786.
## 7 1982 39.9 12881816 978.
## 8 1987 40.8 13867957 852.
## 9 1992 41.7 16317921 649.
## 10 1997 41.8 22227415 635.
## 11 2002 42.1 25268405 727.
## 12 2007 43.8 31889923 975.
country_model <- function(df){
lm(lifeExp ~ year, data = df)
}
models <- map(by_country$data, country_model)
by_country <- by_country %>%
mutate(model = map(data, country_model))
by_country
## # A tibble: 142 x 4
## country continent data model
## <fct> <fct> <list> <list>
## 1 Afghanistan Asia <tibble [12 × 4]> <S3: lm>
## 2 Albania Europe <tibble [12 × 4]> <S3: lm>
## 3 Algeria Africa <tibble [12 × 4]> <S3: lm>
## 4 Angola Africa <tibble [12 × 4]> <S3: lm>
## 5 Argentina Americas <tibble [12 × 4]> <S3: lm>
## 6 Australia Oceania <tibble [12 × 4]> <S3: lm>
## 7 Austria Europe <tibble [12 × 4]> <S3: lm>
## 8 Bahrain Asia <tibble [12 × 4]> <S3: lm>
## 9 Bangladesh Asia <tibble [12 × 4]> <S3: lm>
## 10 Belgium Europe <tibble [12 × 4]> <S3: lm>
## # … with 132 more rows
by_country %>%
filter(continent == "Europe")
## # A tibble: 30 x 4
## country continent data model
## <fct> <fct> <list> <list>
## 1 Albania Europe <tibble [12 × 4]> <S3: lm>
## 2 Austria Europe <tibble [12 × 4]> <S3: lm>
## 3 Belgium Europe <tibble [12 × 4]> <S3: lm>
## 4 Bosnia and Herzegovina Europe <tibble [12 × 4]> <S3: lm>
## 5 Bulgaria Europe <tibble [12 × 4]> <S3: lm>
## 6 Croatia Europe <tibble [12 × 4]> <S3: lm>
## 7 Czech Republic Europe <tibble [12 × 4]> <S3: lm>
## 8 Denmark Europe <tibble [12 × 4]> <S3: lm>
## 9 Finland Europe <tibble [12 × 4]> <S3: lm>
## 10 France Europe <tibble [12 × 4]> <S3: lm>
## # … with 20 more rows
by_country %>%
arrange(continent, country)
## # A tibble: 142 x 4
## country continent data model
## <fct> <fct> <list> <list>
## 1 Algeria Africa <tibble [12 × 4]> <S3: lm>
## 2 Angola Africa <tibble [12 × 4]> <S3: lm>
## 3 Benin Africa <tibble [12 × 4]> <S3: lm>
## 4 Botswana Africa <tibble [12 × 4]> <S3: lm>
## 5 Burkina Faso Africa <tibble [12 × 4]> <S3: lm>
## 6 Burundi Africa <tibble [12 × 4]> <S3: lm>
## 7 Cameroon Africa <tibble [12 × 4]> <S3: lm>
## 8 Central African Republic Africa <tibble [12 × 4]> <S3: lm>
## 9 Chad Africa <tibble [12 × 4]> <S3: lm>
## 10 Comoros Africa <tibble [12 × 4]> <S3: lm>
## # … with 132 more rows
by_country <- by_country %>%
mutate(
resids = map2(data, model, add_residuals)
)
by_country
## # A tibble: 142 x 5
## country continent data model resids
## <fct> <fct> <list> <list> <list>
## 1 Afghanistan Asia <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 2 Albania Europe <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 3 Algeria Africa <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 4 Angola Africa <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 5 Argentina Americas <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 6 Australia Oceania <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 7 Austria Europe <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 8 Bahrain Asia <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 9 Bangladesh Asia <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 10 Belgium Europe <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## # … with 132 more rows
by_country$resids[[1]]
## # A tibble: 12 x 5
## year lifeExp pop gdpPercap resid
## <int> <dbl> <int> <dbl> <dbl>
## 1 1952 28.8 8425333 779. -1.11
## 2 1957 30.3 9240934 821. -0.952
## 3 1962 32.0 10267083 853. -0.664
## 4 1967 34.0 11537966 836. -0.0172
## 5 1972 36.1 13079460 740. 0.674
## 6 1977 38.4 14880372 786. 1.65
## 7 1982 39.9 12881816 978. 1.69
## 8 1987 40.8 13867957 852. 1.28
## 9 1992 41.7 16317921 649. 0.754
## 10 1997 41.8 22227415 635. -0.534
## 11 2002 42.1 25268405 727. -1.54
## 12 2007 43.8 31889923 975. -1.22
resids <- unnest(by_country, resids)
resids
## # A tibble: 1,704 x 7
## country continent year lifeExp pop gdpPercap resid
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779. -1.11
## 2 Afghanistan Asia 1957 30.3 9240934 821. -0.952
## 3 Afghanistan Asia 1962 32.0 10267083 853. -0.664
## 4 Afghanistan Asia 1967 34.0 11537966 836. -0.0172
## 5 Afghanistan Asia 1972 36.1 13079460 740. 0.674
## 6 Afghanistan Asia 1977 38.4 14880372 786. 1.65
## 7 Afghanistan Asia 1982 39.9 12881816 978. 1.69
## 8 Afghanistan Asia 1987 40.8 13867957 852. 1.28
## 9 Afghanistan Asia 1992 41.7 16317921 649. 0.754
## 10 Afghanistan Asia 1997 41.8 22227415 635. -0.534
## # … with 1,694 more rows
resids %>%
ggplot(aes(year, resid)) +
geom_line(aes(group = country), alpha = 1/3) +
geom_smooth(se = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
resids %>%
ggplot(aes(year, resid, group = country)) +
geom_line(alpha = 1/3) +
facet_wrap(~continent)
broom::glance(nz_mod)
## # A tibble: 1 x 11
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.954 0.949 0.804 205. 5.41e-8 2 -13.3 32.6 34.1
## # … with 2 more variables: deviance <dbl>, df.residual <int>
by_country %>%
mutate(glance = map(model, broom::glance)) %>%
unnest(glance)
## # A tibble: 142 x 16
## country continent data model resids r.squared adj.r.squared sigma
## <fct> <fct> <lis> <lis> <list> <dbl> <dbl> <dbl>
## 1 Afghan… Asia <tib… <S3:… <tibb… 0.948 0.942 1.22
## 2 Albania Europe <tib… <S3:… <tibb… 0.911 0.902 1.98
## 3 Algeria Africa <tib… <S3:… <tibb… 0.985 0.984 1.32
## 4 Angola Africa <tib… <S3:… <tibb… 0.888 0.877 1.41
## 5 Argent… Americas <tib… <S3:… <tibb… 0.996 0.995 0.292
## 6 Austra… Oceania <tib… <S3:… <tibb… 0.980 0.978 0.621
## 7 Austria Europe <tib… <S3:… <tibb… 0.992 0.991 0.407
## 8 Bahrain Asia <tib… <S3:… <tibb… 0.967 0.963 1.64
## 9 Bangla… Asia <tib… <S3:… <tibb… 0.989 0.988 0.977
## 10 Belgium Europe <tib… <S3:… <tibb… 0.995 0.994 0.293
## # … with 132 more rows, and 8 more variables: statistic <dbl>,
## # p.value <dbl>, df <int>, logLik <dbl>, AIC <dbl>, BIC <dbl>,
## # deviance <dbl>, df.residual <int>
glance <- by_country %>%
mutate(glance = map(model, broom::glance)) %>%
unnest(glance, .drop = TRUE)
glance
## # A tibble: 142 x 13
## country continent r.squared adj.r.squared sigma statistic p.value df
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 Afghan… Asia 0.948 0.942 1.22 181. 9.84e- 8 2
## 2 Albania Europe 0.911 0.902 1.98 102. 1.46e- 6 2
## 3 Algeria Africa 0.985 0.984 1.32 662. 1.81e-10 2
## 4 Angola Africa 0.888 0.877 1.41 79.1 4.59e- 6 2
## 5 Argent… Americas 0.996 0.995 0.292 2246. 4.22e-13 2
## 6 Austra… Oceania 0.980 0.978 0.621 481. 8.67e-10 2
## 7 Austria Europe 0.992 0.991 0.407 1261. 7.44e-12 2
## 8 Bahrain Asia 0.967 0.963 1.64 291. 1.02e- 8 2
## 9 Bangla… Asia 0.989 0.988 0.977 930. 3.37e-11 2
## 10 Belgium Europe 0.995 0.994 0.293 1822. 1.20e-12 2
## # … with 132 more rows, and 5 more variables: logLik <dbl>, AIC <dbl>,
## # BIC <dbl>, deviance <dbl>, df.residual <int>
glance %>%
arrange(r.squared)
## # A tibble: 142 x 13
## country continent r.squared adj.r.squared sigma statistic p.value df
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 Rwanda Africa 0.0172 -0.0811 6.56 0.175 0.685 2
## 2 Botswa… Africa 0.0340 -0.0626 6.11 0.352 0.566 2
## 3 Zimbab… Africa 0.0562 -0.0381 7.21 0.596 0.458 2
## 4 Zambia Africa 0.0598 -0.0342 4.53 0.636 0.444 2
## 5 Swazil… Africa 0.0682 -0.0250 6.64 0.732 0.412 2
## 6 Lesotho Africa 0.0849 -0.00666 5.93 0.927 0.358 2
## 7 Cote d… Africa 0.283 0.212 3.93 3.95 0.0748 2
## 8 South … Africa 0.312 0.244 4.74 4.54 0.0588 2
## 9 Uganda Africa 0.342 0.276 3.19 5.20 0.0457 2
## 10 Congo,… Africa 0.348 0.283 2.43 5.34 0.0434 2
## # … with 132 more rows, and 5 more variables: logLik <dbl>, AIC <dbl>,
## # BIC <dbl>, deviance <dbl>, df.residual <int>
glance %>%
ggplot(aes(continent, r.squared)) +
geom_jitter(width = 0.5)
bad_fit <- filter(glance, r.squared < 0.25)
gapminder %>%
semi_join(bad_fit, by = "country") %>%
ggplot(aes(year, lifeExp, color = country)) +
geom_line()
data.frame(x = list(1:3, 3:5))
## x.1.3 x.3.5
## 1 1 3
## 2 2 4
## 3 3 5
data.frame(
x = I(list(1:3, 3:5)),
y = c("1,2","3,4,5")
)
## x y
## 1 1, 2, 3 1,2
## 2 3, 4, 5 3,4,5
tibble(
x = list(1:3, 3:5),
y = c("1,2", "3,4,5")
)
## # A tibble: 2 x 2
## x y
## <list> <chr>
## 1 <int [3]> 1,2
## 2 <int [3]> 3,4,5
tribble(
~x, ~y,
1:3, "1,2",
3:5, "3,4,5"
)
## # A tibble: 2 x 2
## x y
## <list> <chr>
## 1 <int [3]> 1,2
## 2 <int [3]> 3,4,5
gapminder %>%
group_by(country, continent) %>%
nest()
## # A tibble: 142 x 3
## country continent data
## <fct> <fct> <list>
## 1 Afghanistan Asia <tibble [12 × 4]>
## 2 Albania Europe <tibble [12 × 4]>
## 3 Algeria Africa <tibble [12 × 4]>
## 4 Angola Africa <tibble [12 × 4]>
## 5 Argentina Americas <tibble [12 × 4]>
## 6 Australia Oceania <tibble [12 × 4]>
## 7 Austria Europe <tibble [12 × 4]>
## 8 Bahrain Asia <tibble [12 × 4]>
## 9 Bangladesh Asia <tibble [12 × 4]>
## 10 Belgium Europe <tibble [12 × 4]>
## # … with 132 more rows
gapminder %>%
nest(year:gdpPercap)
## # A tibble: 142 x 3
## country continent data
## <fct> <fct> <list>
## 1 Afghanistan Asia <tibble [12 × 4]>
## 2 Albania Europe <tibble [12 × 4]>
## 3 Algeria Africa <tibble [12 × 4]>
## 4 Angola Africa <tibble [12 × 4]>
## 5 Argentina Americas <tibble [12 × 4]>
## 6 Australia Oceania <tibble [12 × 4]>
## 7 Austria Europe <tibble [12 × 4]>
## 8 Bahrain Asia <tibble [12 × 4]>
## 9 Bangladesh Asia <tibble [12 × 4]>
## 10 Belgium Europe <tibble [12 × 4]>
## # … with 132 more rows
df <- tribble(
~x1,
"a,b,c",
"d,e,f,g"
)
df
## # A tibble: 2 x 1
## x1
## <chr>
## 1 a,b,c
## 2 d,e,f,g
df %>%
mutate(x2 = stringr::str_split(x1, ","))
## # A tibble: 2 x 2
## x1 x2
## <chr> <list>
## 1 a,b,c <chr [3]>
## 2 d,e,f,g <chr [4]>
df %>%
mutate(x2 = stringr::str_split(x1, ",")) %>%
unnest()
## # A tibble: 7 x 2
## x1 x2
## <chr> <chr>
## 1 a,b,c a
## 2 a,b,c b
## 3 a,b,c c
## 4 d,e,f,g d
## 5 d,e,f,g e
## 6 d,e,f,g f
## 7 d,e,f,g g
sim <- tribble(
~f, ~params,
"runif",list(min = -1, max = -1),
"rnorm",list(sd = 5),
"rpois",list(lambda = 10)
)
sim %>%
mutate(sims = invoke_map(f, params, n = 10))
## # A tibble: 3 x 3
## f params sims
## <chr> <list> <list>
## 1 runif <list [2]> <dbl [10]>
## 2 rnorm <list [1]> <dbl [10]>
## 3 rpois <list [1]> <int [10]>
mtcars %>%
group_by(cyl) %>%
summarize(q = quantile(mpg))
## Error: Column `q` must be length 1 (a summary value), not 5
mtcars %>%
group_by(cyl) %>%
summarize(q = list(quantile(mpg)))
## # A tibble: 3 x 2
## cyl q
## <dbl> <list>
## 1 16 <dbl [5]>
## 2 24 <dbl [5]>
## 3 32 <dbl [5]>
probs <- c(0.01, 0.25, 0.5, 0.75, 0.99)
mtcars %>%
group_by(cyl) %>%
summarize(p = list(probs), q = list(quantile(mpg, probs))) %>%
unnest()
## # A tibble: 15 x 3
## cyl p q
## <dbl> <dbl> <dbl>
## 1 16 0.01 21.4
## 2 16 0.25 22.8
## 3 16 0.5 26
## 4 16 0.75 30.4
## 5 16 0.99 33.8
## 6 24 0.01 17.8
## 7 24 0.25 18.6
## 8 24 0.5 19.7
## 9 24 0.75 21
## 10 24 0.99 21.4
## 11 32 0.01 10.4
## 12 32 0.25 14.4
## 13 32 0.5 15.2
## 14 32 0.75 16.2
## 15 32 0.99 19.1
x <- list(
a = 1:5,
b = 3:4,
c = 5:6
)
df <- enframe(x)
df
## # A tibble: 3 x 2
## name value
## <chr> <list>
## 1 a <int [5]>
## 2 b <int [2]>
## 3 c <int [2]>
df %>%
mutate(
smry = map2_chr(
name,
value, ~ stringr::str_c(.x, ":", .y[1])
)
)
## # A tibble: 3 x 3
## name value smry
## <chr> <list> <chr>
## 1 a <int [5]> a:1
## 2 b <int [2]> b:3
## 3 c <int [2]> c:5
df <- tribble(
~x,
letters[1:5],
1:3,
runif(5)
)
df
## # A tibble: 3 x 1
## x
## <list>
## 1 <chr [5]>
## 2 <int [3]>
## 3 <dbl [5]>
df %>%
mutate(
type = map_chr(x, typeof),
length = map_int(x, length)
)
## # A tibble: 3 x 3
## x type length
## <list> <chr> <int>
## 1 <chr [5]> character 5
## 2 <int [3]> integer 3
## 3 <dbl [5]> double 5
df <- tribble(
~x,
list(a = 1, b = 2),
list(a = 2, c = 4)
)
df
## # A tibble: 2 x 1
## x
## <list>
## 1 <list [2]>
## 2 <list [2]>
df %>%
mutate(
a = map_dbl(x, "a"),
b = map_dbl(x, "b", .null = NA_real_)
)
## # A tibble: 2 x 3
## x a b
## <list> <dbl> <dbl>
## 1 <list [2]> 1 2
## 2 <list [2]> 2 NA
tibble(x = 1:2, y = list(1:4, 1)) %>% unnest(y)
## # A tibble: 5 x 2
## x y
## <int> <dbl>
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 2 1
df1 <- tribble(
~x, ~y, ~z,
1, c("a","b"), 1:2,
2, "c", 3
)
df1
## # A tibble: 2 x 3
## x y z
## <dbl> <list> <list>
## 1 1 <chr [2]> <int [2]>
## 2 2 <chr [1]> <dbl [1]>
df1 %>% unnest(y,z)
## # A tibble: 3 x 3
## x y z
## <dbl> <chr> <dbl>
## 1 1 a 1
## 2 1 b 2
## 3 2 c 3
df2 <- tribble(
~x, ~y, ~z,
1, "a", 1:2,
2, c("b","c"),3
)
df2
## # A tibble: 2 x 3
## x y z
## <dbl> <list> <list>
## 1 1 <chr [1]> <int [2]>
## 2 2 <chr [2]> <dbl [1]>
df2 %>% unnest(y,z)
## All nested columns must have the same number of elements.
##21. R markdown
eval = FALSE,
include = FALSE,
echo = FALSE,
message = FALSE,
warning = FALSE,
results = 'hide',
fig.show = 'hide'
error = TRUE
}
mtcars[1:5, 1:10]
## mpg cyl disp hp drat wt qsec vs am gear
## Mazda RX4 21.0 24 160 110 3.90 2.620 16.46 0 1 4
## Mazda RX4 Wag 21.0 24 160 110 3.90 2.875 17.02 0 1 4
## Datsun 710 22.8 16 108 93 3.85 2.320 18.61 1 1 4
## Hornet 4 Drive 21.4 24 258 110 3.08 3.215 19.44 1 0 3
## Hornet Sportabout 18.7 32 360 175 3.15 3.440 17.02 0 0 3
knitr::kable(
mtcars[1:5,],
caption = "A knitr kable."
)
| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Mazda RX4 | 21.0 | 24 | 160 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
| Mazda RX4 Wag | 21.0 | 24 | 160 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
| Datsun 710 | 22.8 | 16 | 108 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| Hornet 4 Drive | 21.4 | 24 | 258 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| Hornet Sportabout | 18.7 | 32 | 360 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
knitr::clean_cache()
## NULL
# knitr::opts_chunk$set(
# comment = "#>",
# collapse = TRUE)
# knitr::opts_chunk$set(
# echo = FALSE
# )
# we have about `r nrow(diaamonds)` diamonds. Only `r nrow(diamonds) - nrow(smaller)` are larger than 2.5 carats. The distribution of the remainder is shown below:
comma <- function(x) format(x, digits = 2, big.mark = ",")
comma(3452345)
## [1] "3,452,345"
comma(.12358124331)
## [1] "0.12"
# ---
# output: html_document
# params: my_class:"suv"
---
# ```{r setup, include = FALSE}
# library(ggplot2)
# library(dplyr)
# class <- mpg %>% filter(class == params$my_class)
# ```
# # Fuel economy for `r params$my_class`s
# ```{r, message = FALSE}
# ggplot(class, aes(displ, hwy)) +
# geom_point() +
# geom_smooth(se = FALSE)
# ```
## Error: <text>:16:0: unexpected end of input
## 14: # geom_smooth(se = FALSE)
## 15: # ```
## ^
# params:
# start: !r lubridate::ymd("2015-01-01")
# snapshot: !r lubridate::ymd_hms("2015-01-01 12:30:00")
# rmarkdown::render(
# "fuel-economy.Rmd",
# params = list(my_class = "suv")
# )
# reports <- tibble(
# class = unique(mpg$class),
# filename = stringr::str_c("fuel-economy-",class,".html"),
# params = purrr:map(class, ~list(my_class = .))
#)
#reports
#reports %>%
# select(output_file = filename, params) %>%
# purrr::pwalk(rmarkdown::render, input = "fuel-economy.Rmd")
library(tidyverse)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = TRUE) +
labs(
title = paste(
"Fuel efficiency generally decreases with engine size"
)
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(
title = paste(
"Fuel efficiency generally decreases with", "engine size"),
subtitle = paste(
"Two seaters (sports cars) are an exception", "because of their light weight"),
caption = "Data from fueleconomy.gov"
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(
x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
color = "Car type"
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
df <- tibble(
x = runif(10),
y = runif(10)
)
ggplot(df, aes(x, y)) +
geom_point() +
labs(
x = quote(sum(x[i] ^ 2, i == 1, n)),
y = quote(alpha + beta + frac(delta, theta))
)
best_in_class <- mpg %>%
group_by(class) %>%
filter(row_number(desc(hwy)) == 1)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_text(aes(label = model), data = best_in_class)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_label(
aes(label = model),
data = best_in_class,
nudge_y = 2,
alpha = 0.5
)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_point(size = 3, shape = 1, data = best_in_class) +
ggrepel::geom_label_repel(
aes(label = model),
data = best_in_class
)
class_avg <- mpg %>%
group_by(class) %>%
summarize(
displ = median(displ),
hwy = median(hwy)
)
ggplot(mpg, aes(displ, hwy, color = class)) +
ggrepel::geom_label_repel(aes(label = class),
data = class_avg,
size = 6,
label.size = 0,
segment.color = NA
) +
geom_point() +
theme(legend.position = "none")
label <- mpg %>%
summarize(
displ = max(displ),
hwy = max(hwy),
label = paste(
"Increasing engine size is \nrelated to decreasing fuel economy."
)
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(
aes(label = label),
data = label,
vjust = "top",
hjust = "right"
)
label <- tibble(
displ = Inf,
hwy = Inf,
label = paste(
"Increasing engine size is \nrelated to decreasing fuel economy."
)
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(
aes(label = label),
data = label,
vjust = "top",
hjust = "right"
)
"Increasing engine size related to decreasing fuel economy." %>%
stringr::str_wrap(width = 40) %>%
writeLines()
## Increasing engine size related to
## decreasing fuel economy.
# geom_hline(), geom_vline(), geom_rect(), geom_segment()
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class))
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
scale_x_continuous() +
scale_y_continuous() +
scale_color_discrete()
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
scale_y_continuous(breaks = seq(15,40,by = 5))
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
scale_x_continuous(labels = NULL) +
scale_y_continuous(labels = NULL)
presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id)) +
geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_x_date(
NULL,
breaks = presidential$start,
date_labels = "'%y"
)
base <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class))
base + theme(legend.position = "left")
base + theme(legend.position = "top")
base + theme(legend.position = "bottom")
base + theme(legend.position = "right")
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
theme(legend.position = "bottom") +
guides(
color = guide_legend(
nrow = 1,
override.aes = list(size = 4)
)
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(diamonds, aes(carat, price)) +
geom_bin2d()
## Warning: Removed 1 rows containing non-finite values (stat_bin2d).
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d()
## Warning: Removed 1 rows containing non-finite values (stat_bin2d).
ggplot(diamonds, aes(carat, price)) +
geom_bin2d() +
scale_x_log10() +
scale_y_log10()
## Warning: Removed 1 rows containing non-finite values (stat_bin2d).
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv))
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv)) +
scale_color_brewer(palette = "Set1")
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv, shape = drv)) +
scale_color_brewer(palette = "Set1")
presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id, color = party)) +
geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_color_manual(
values = c(Republican = "red", Democratic = "blue")
)
df <- tibble(
x = rnorm(10000),
y = rnorm(10000)
)
ggplot(df, aes(x, y)) +
geom_hex() +
coord_fixed()
ggplot(df, aes(x, y)) +
geom_hex() +
viridis::scale_fill_viridis() +
coord_fixed()
ggplot(mpg, mapping = aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mpg, mapping = aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth() +
coord_cartesian(xlim = c(5,7), ylim = c(10,30))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
mpg %>%
filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
ggplot(aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")
ggplot(suv, aes(displ, hwy, color = drv)) + geom_point()
ggplot(compact, aes(displ, hwy, color = drv)) + geom_point()
x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_color_discrete(limits = unique(mpg$drv))
ggplot(suv, aes(displ, hwy, color = drv)) +
geom_point() +
x_scale +
y_scale +
col_scale
ggplot(compact, aes(displ, hwy, color = drv)) +
geom_point() +
x_scale +
y_scale +
col_scale
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth() +
theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth() +
theme_dark()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(mpg, aes(displ, hwy)) + geom_point()
# ggsave("my_plot.pdf)
# fig.width, fig.height, fig.asp, out.width, out.height
# ---
# title: "viridis Demo"
# output: html_document
# ---
# rmarkdown::render(
# "diamond-sizes.Rmd",
# output_format = "word_document"
# )
# flexdashboard
# htmlwidgets
library(leaflet)
leaflet() %>%
setView(174.764, -36.877, zoom = 16) %>%
addTiles() %>%
addMarkers(174.764, -36.877, popup = "Maungawhau")
library(shiny)
textInput("name", "What is your name?")
numericInput("age", "How old are you?", NA, min = 0, max = 150)
#